From 8658754f9f910c14473b9e5d39443b68cfb4e9b4 Mon Sep 17 00:00:00 2001 From: zhangzhanwei Date: Mon, 15 Jun 2026 10:39:44 +0800 Subject: [PATCH] fix: use simple config for SearchVector to avoid double stemming --- apps/common/event/listener_manage.py | 2 +- apps/knowledge/vector/pg_vector.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/apps/common/event/listener_manage.py b/apps/common/event/listener_manage.py index 2b7209ae34c..f9617bbb417 100644 --- a/apps/common/event/listener_manage.py +++ b/apps/common/event/listener_manage.py @@ -230,7 +230,7 @@ def tokenize_by_paragraph(paragraph_id): ) data_list = list(QuerySet(Embedding).filter(paragraph_id=paragraph_id)) for data, chunk in zip(data_list, chunks): - data.search_vector = SearchVector(Value(to_ts_vector(chunk, user_words=user_words))) + data.search_vector = SearchVector(Value(to_ts_vector(chunk, user_words=user_words)), config='simple') # 批量保存,减少数据库写入次数 QuerySet(Embedding).filter(paragraph_id=paragraph_id).bulk_update(data_list, ["search_vector"]) diff --git a/apps/knowledge/vector/pg_vector.py b/apps/knowledge/vector/pg_vector.py index fcd612f52c3..0fc8ed96049 100644 --- a/apps/knowledge/vector/pg_vector.py +++ b/apps/knowledge/vector/pg_vector.py @@ -71,7 +71,7 @@ def _save( source_id=source_id, embedding=text_embedding, source_type=source_type, - search_vector=SearchVector(Value(to_ts_vector(text, user_words=terms))), + search_vector=SearchVector(Value(to_ts_vector(text, user_words=terms)), config='simple'), ) embedding.save() return True @@ -99,7 +99,8 @@ def _batch_save(self, text_list: List[Dict], embedding: Embeddings, is_the_task_ .values_list("content", flat=True) ), ) - ) + ), + config='simple', ), ) for index in range(0, len(texts))