diff --git a/apps/common/event/listener_manage.py b/apps/common/event/listener_manage.py index 2b7209ae34c..f9617bbb417 100644 --- a/apps/common/event/listener_manage.py +++ b/apps/common/event/listener_manage.py @@ -230,7 +230,7 @@ def tokenize_by_paragraph(paragraph_id): ) data_list = list(QuerySet(Embedding).filter(paragraph_id=paragraph_id)) for data, chunk in zip(data_list, chunks): - data.search_vector = SearchVector(Value(to_ts_vector(chunk, user_words=user_words))) + data.search_vector = SearchVector(Value(to_ts_vector(chunk, user_words=user_words)), config='simple') # 批量保存,减少数据库写入次数 QuerySet(Embedding).filter(paragraph_id=paragraph_id).bulk_update(data_list, ["search_vector"]) diff --git a/apps/knowledge/vector/pg_vector.py b/apps/knowledge/vector/pg_vector.py index fcd612f52c3..0fc8ed96049 100644 --- a/apps/knowledge/vector/pg_vector.py +++ b/apps/knowledge/vector/pg_vector.py @@ -71,7 +71,7 @@ def _save( source_id=source_id, embedding=text_embedding, source_type=source_type, - search_vector=SearchVector(Value(to_ts_vector(text, user_words=terms))), + search_vector=SearchVector(Value(to_ts_vector(text, user_words=terms)), config='simple'), ) embedding.save() return True @@ -99,7 +99,8 @@ def _batch_save(self, text_list: List[Dict], embedding: Embeddings, is_the_task_ .values_list("content", flat=True) ), ) - ) + ), + config='simple', ), ) for index in range(0, len(texts))