1- """Helpers for codebase_analyze — file walking, hashing, entity persistence.
2-
3- Extracted from codebase_analyze.py to keep each file under 300 lines
4- and each function under 40 lines.
5- """
1+ """Helpers for codebase_analyze — file walking, hashing, entity persistence."""
62
73from __future__ import annotations
84
@@ -60,12 +56,22 @@ def collect_source_files(
6056# ── Hash-based change detection ───────────────────────────────────────────
6157
6258
63- def load_existing_hashes (store : MemoryStore ) -> dict [str , tuple [int , str ]]:
64- """Load existing codebase memory hashes.
59+ def _parse_tags (raw : object ) -> list :
60+ """Parse tags from a list (PG) or JSON string (SQLite)."""
61+ if isinstance (raw , list ):
62+ return raw
63+ if isinstance (raw , str ):
64+ import json
65+
66+ try :
67+ return json .loads (raw )
68+ except (ValueError , TypeError ):
69+ return []
70+ return []
71+
6572
66- Returns:
67- Dict mapping file_path to (memory_id, content_hash).
68- """
73+ def load_existing_hashes (store : MemoryStore ) -> dict [str , tuple [int , str ]]:
74+ """Load existing codebase memory hashes: {path: (id, hash)}."""
6975 hashes : dict [str , tuple [int , str ]] = {}
7076 try :
7177 rows = store ._conn .execute (
@@ -74,7 +80,7 @@ def load_existing_hashes(store: MemoryStore) -> dict[str, tuple[int, str]]:
7480 ).fetchall ()
7581 for row in rows :
7682 mem_id = row ["id" ]
77- tags = row [ "tags" ] if isinstance (row ["tags" ], list ) else []
83+ tags = _parse_tags (row ["tags" ])
7884 file_path , content_hash = _extract_file_hash (tags )
7985 if file_path and content_hash :
8086 hashes [file_path ] = (mem_id , content_hash )
@@ -192,11 +198,7 @@ def persist_entities(
192198 memory_id : int ,
193199 domain : str ,
194200) -> tuple [int , int ]:
195- """Persist file entity, symbols, and imports to knowledge graph.
196-
197- Returns:
198- Tuple of (entities_created, relationships_created).
199- """
201+ """Persist file entity, symbols, and imports. Returns (entities, rels)."""
200202 entities , relationships = 0 , 0
201203 try :
202204 file_eid = _get_or_create_entity (store , analysis .path , "file" , domain )
@@ -273,17 +275,25 @@ def persist_community_tags(
273275 communities : dict [str , int ],
274276) -> None :
275277 """Tag codebase memories with their community cluster ID."""
278+ import json
279+
276280 for file_path , cluster_id in communities .items ():
277281 try :
278- store ._conn .execute (
279- "UPDATE memories SET tags = tags || %s::jsonb "
280- "WHERE agent_context = 'codebase' "
281- "AND tags @> %s::jsonb AND NOT is_stale" ,
282- (
283- f'["cluster:{ cluster_id } "]' ,
284- f'["file:{ file_path } "]' ,
285- ),
286- )
282+ rows = store ._conn .execute (
283+ "SELECT id, tags FROM memories "
284+ "WHERE agent_context = 'codebase' AND NOT is_stale "
285+ "AND content LIKE %s" ,
286+ (f"%{ file_path } %" ,),
287+ ).fetchall ()
288+ for row in rows :
289+ tags = _parse_tags (row ["tags" ])
290+ tag = f"cluster:{ cluster_id } "
291+ if tag not in tags :
292+ tags .append (tag )
293+ store ._conn .execute (
294+ "UPDATE memories SET tags = %s WHERE id = %s" ,
295+ (json .dumps (tags ), row ["id" ]),
296+ )
287297 except Exception :
288298 pass
289299 if communities :
0 commit comments