diff --git a/src/core/algorithm/flat_sparse/flat_sparse_streamer.cc b/src/core/algorithm/flat_sparse/flat_sparse_streamer.cc index 4df83d8f7..bc5a3da17 100644 --- a/src/core/algorithm/flat_sparse/flat_sparse_streamer.cc +++ b/src/core/algorithm/flat_sparse/flat_sparse_streamer.cc @@ -195,8 +195,10 @@ int FlatSparseStreamer::add_impl(uint64_t pkey, const uint32_t sparse_count, } if (ailego_unlikely(sparse_count > PARAM_FLAT_SPARSE_MAX_DIM_SIZE)) { - LOG_ERROR("Add vector failed, dim size too larg, dim_size=%u, key=%zu", - sparse_count, (size_t)pkey); + LOG_ERROR( + "Failed to add sparse vector: number of non-zero elements (%u) exceeds " + "maximum allowed (%u), key=%zu", + sparse_count, PARAM_FLAT_SPARSE_MAX_DIM_SIZE, (size_t)pkey); (*stats_.mutable_discarded_count())++; return IndexError_InvalidValue; } @@ -252,8 +254,10 @@ int FlatSparseStreamer::add_with_id_impl(uint32_t pkey, } if (ailego_unlikely(sparse_count > PARAM_FLAT_SPARSE_MAX_DIM_SIZE)) { - LOG_ERROR("Add vector failed, dim size too larg, dim_size=%u, key=%zu", - sparse_count, (size_t)pkey); + LOG_ERROR( + "Failed to add sparse vector: number of non-zero elements (%u) exceeds " + "maximum allowed (%u), key=%zu", + sparse_count, PARAM_FLAT_SPARSE_MAX_DIM_SIZE, (size_t)pkey); (*stats_.mutable_discarded_count())++; return IndexError_InvalidValue; } diff --git a/src/core/algorithm/flat_sparse/flat_sparse_utility.h b/src/core/algorithm/flat_sparse/flat_sparse_utility.h index 4566b5dee..a66e2f631 100644 --- a/src/core/algorithm/flat_sparse/flat_sparse_utility.h +++ b/src/core/algorithm/flat_sparse/flat_sparse_utility.h @@ -19,7 +19,7 @@ namespace zvec { namespace core { -static constexpr uint32_t PARAM_FLAT_SPARSE_MAX_DIM_SIZE = 4096; +static constexpr uint32_t PARAM_FLAT_SPARSE_MAX_DIM_SIZE = 16384; static const std::string PARAM_FLAT_SPARSE_META_SEG_ID = "bruteforce_sparse_meta"; diff --git a/src/core/algorithm/hnsw_sparse/hnsw_sparse_builder_entity.cc b/src/core/algorithm/hnsw_sparse/hnsw_sparse_builder_entity.cc index 48c20d726..25c5c00cd 100644 --- a/src/core/algorithm/hnsw_sparse/hnsw_sparse_builder_entity.cc +++ b/src/core/algorithm/hnsw_sparse/hnsw_sparse_builder_entity.cc @@ -88,9 +88,11 @@ int HnswSparseBuilderEntity::add_vector(level_t level, key_t key, const uint32_t sparse_count, const uint32_t *sparse_indices, const void *sparse_vec, node_id_t *id) { - if (ailego_unlikely(sparse_count >= HnswSparseEntity::kSparseMaxDimSize)) { - LOG_WARN("Add vector failed, dim size too larg, dim_size=%u, key=%zu", - sparse_count, (size_t)key); + if (ailego_unlikely(sparse_count > HnswSparseEntity::kSparseMaxDimSize)) { + LOG_WARN( + "Failed to add sparse vector: number of non-zero elements (%u) exceeds " + "maximum allowed (%u), key=%zu", + sparse_count, HnswSparseEntity::kSparseMaxDimSize, (size_t)key); return IndexError_InvalidValue; } diff --git a/src/core/algorithm/hnsw_sparse/hnsw_sparse_entity.h b/src/core/algorithm/hnsw_sparse/hnsw_sparse_entity.h index 37166027e..d514e0c2e 100644 --- a/src/core/algorithm/hnsw_sparse/hnsw_sparse_entity.h +++ b/src/core/algorithm/hnsw_sparse/hnsw_sparse_entity.h @@ -610,7 +610,7 @@ class HnswSparseEntity { constexpr static uint32_t kSparseMetaSize = 2u * sizeof(uint64_t); constexpr static float kDefaultSparseNeighborRatio = 0.5f; - constexpr static uint32_t kSparseMaxDimSize = 4096; + constexpr static uint32_t kSparseMaxDimSize = 16384; constexpr static float kDefaultQueryFilteringRatio = 0.0f; // turn off protected: diff --git a/src/core/algorithm/hnsw_sparse/hnsw_sparse_streamer.cc b/src/core/algorithm/hnsw_sparse/hnsw_sparse_streamer.cc index f51ebb5ea..3abce8087 100644 --- a/src/core/algorithm/hnsw_sparse/hnsw_sparse_streamer.cc +++ b/src/core/algorithm/hnsw_sparse/hnsw_sparse_streamer.cc @@ -438,9 +438,11 @@ int HnswSparseStreamer::add_with_id_impl(uint32_t id, return ret; } - if (ailego_unlikely(sparse_count >= HnswSparseEntity::kSparseMaxDimSize)) { - LOG_WARN("Add vector failed, dim size too larg, dim_size=%u, id=%u", - sparse_count, id); + if (ailego_unlikely(sparse_count > HnswSparseEntity::kSparseMaxDimSize)) { + LOG_WARN( + "Failed to add sparse vector: number of non-zero elements (%u) exceeds " + "maximum allowed (%u), id=%u", + sparse_count, HnswSparseEntity::kSparseMaxDimSize, id); return IndexError_InvalidValue; } @@ -523,9 +525,11 @@ int HnswSparseStreamer::add_impl(uint64_t pkey, const uint32_t sparse_count, return ret; } - if (ailego_unlikely(sparse_count >= HnswSparseEntity::kSparseMaxDimSize)) { - LOG_WARN("Add vector failed, dim size too larg, dim_size=%u, key=%zu", - sparse_count, (size_t)pkey); + if (ailego_unlikely(sparse_count > HnswSparseEntity::kSparseMaxDimSize)) { + LOG_WARN( + "Failed to add sparse vector: number of non-zero elements (%u) exceeds " + "maximum allowed (%u), key=%zu", + sparse_count, HnswSparseEntity::kSparseMaxDimSize, (size_t)pkey); return IndexError_InvalidValue; } diff --git a/src/db/common/constants.h b/src/db/common/constants.h index 39b13f445..d07512f3b 100644 --- a/src/db/common/constants.h +++ b/src/db/common/constants.h @@ -32,7 +32,7 @@ const std::string GLOBAL_DOC_ID = "_zvec_g_doc_id_"; const std::string USER_ID = "_zvec_uid_"; -const int kSparseMaxDimSize = 4096; +const int kSparseMaxDimSize = 16384; const int64_t kMaxRecordBatchNumRows = 4096; diff --git a/src/db/index/common/doc.cc b/src/db/index/common/doc.cc index 6d411bfb2..2f9f12b03 100644 --- a/src/db/index/common/doc.cc +++ b/src/db/index/common/doc.cc @@ -866,6 +866,12 @@ Status Doc::validate(const CollectionSchema::Ptr &schema, "doc validate failed: field[", field_name, "]'s sparse vector indices and values size not match"); } + if (sparse_indices.size() > kSparseMaxDimSize) { + return Status::InvalidArgument( + "doc validate failed: vector[", field_name, + "], the number of sparse indices exceeds the maximum limit ", + kSparseMaxDimSize); + } } break; } @@ -881,6 +887,12 @@ Status Doc::validate(const CollectionSchema::Ptr &schema, "doc validate failed: field[", field_name, "]'s sparse vector indices and values size not match"); } + if (sparse_indices.size() > kSparseMaxDimSize) { + return Status::InvalidArgument( + "doc validate failed: vector[", field_name, + "], the number of sparse indices exceeds the maximum limit ", + kSparseMaxDimSize); + } } break; } @@ -1251,9 +1263,11 @@ Status VectorQuery::validate(const FieldSchema *schema) const { } } else if (schema->is_sparse_vector()) { // validate sparse indices size - if (query_sparse_indices_.size() >= kSparseMaxDimSize * sizeof(uint32_t)) { + if (query_sparse_indices_.size() > kSparseMaxDimSize * sizeof(uint32_t)) { return Status::InvalidArgument( - "query validate failed: sparse indices size is too large"); + "query validate failed: the number of sparse indices exceeds the " + "maximum limit ", + kSparseMaxDimSize); } } else { return Status::InvalidArgument( diff --git a/tests/db/index/common/doc_test.cc b/tests/db/index/common/doc_test.cc index 5047ecb5d..9098478be 100644 --- a/tests/db/index/common/doc_test.cc +++ b/tests/db/index/common/doc_test.cc @@ -1255,7 +1255,7 @@ TEST(VectorQuery, Validate) { VectorQuery query; query.field_name_ = "field_name"; query.topk_ = 100; - std::vector query_indices = std::vector(4097); + std::vector query_indices = std::vector(16385); std::string query_indices_str = std::string(reinterpret_cast(query_indices.data()), query_indices.size() * sizeof(uint32_t));