diff --git a/codegraph/analysis/context_builder.py b/codegraph/analysis/context_builder.py index 92de41c..1798133 100644 --- a/codegraph/analysis/context_builder.py +++ b/codegraph/analysis/context_builder.py @@ -16,9 +16,6 @@ import sqlite3 from dataclasses import dataclass, field -import kuzu - -from codegraph.core.utils import rows as _rows from codegraph.core.fts import fts_search @@ -247,7 +244,7 @@ def _keyword_query(task: str, min_len: int = 3) -> str: def context_for_task( task: str, - kuzu_conn: kuzu.Connection, + kuzu_conn, fts_conn: sqlite3.Connection, max_nodes: int = 15, ) -> TaskContext: @@ -297,35 +294,32 @@ def context_for_task( relevance=min(relevance, 1.0), ) - # Step 3: Expand via graph — find callers/callees + # Step 3: Expand via graph — find callers/callees (up to 3 each) if r.kind == "function": - callers = _rows( - kuzu_conn.execute( - "MATCH (caller:Function)-[:CALLS]->(fn:Function) WHERE fn.name = $n RETURN caller.name LIMIT 3", - {"n": r.name}, - ) - ) + callers = kuzu_conn.find_neighbors( + "CALLS", + dst_where={"name": r.name}, + return_src=["name"], + )[:3] for c in callers: - node.relationships.append(f"called by {c['caller.name']}") + node.relationships.append(f"called by {c['src_name']}") - callees = _rows( - kuzu_conn.execute( - "MATCH (fn:Function)-[:CALLS]->(callee:Function) WHERE fn.name = $n RETURN callee.name LIMIT 3", - {"n": r.name}, - ) - ) + callees = kuzu_conn.find_neighbors( + "CALLS", + src_where={"name": r.name}, + return_dst=["name"], + )[:3] for c in callees: - node.relationships.append(f"calls {c['callee.name']}") + node.relationships.append(f"calls {c['dst_name']}") elif r.kind == "class": - parents = _rows( - kuzu_conn.execute( - "MATCH (c:Class)-[:INHERITS]->(p:Class) WHERE c.name = $n RETURN p.name LIMIT 3", - {"n": r.name}, - ) - ) + parents = kuzu_conn.find_neighbors( + "INHERITS", + src_where={"name": r.name}, + return_dst=["name"], + )[:3] for p in parents: - node.relationships.append(f"extends {p['p.name']}") + node.relationships.append(f"extends {p['dst_name']}") nodes.append(node) diff --git a/codegraph/analysis/dead_code.py b/codegraph/analysis/dead_code.py index 708fb0b..7f2fc55 100644 --- a/codegraph/analysis/dead_code.py +++ b/codegraph/analysis/dead_code.py @@ -13,10 +13,6 @@ from dataclasses import dataclass -import kuzu - -from codegraph.core.utils import rows as _rows - # Entry-point names that are never "dead" even without incoming edges _ENTRY_POINTS = { "__init__", @@ -45,60 +41,50 @@ class DeadSymbol: def find_dead_code( - conn: kuzu.Connection, + conn, include_private: bool = False, file_filter: str | None = None, ) -> list[DeadSymbol]: """Find functions and classes with no incoming CALLS / INHERITS edges.""" dead: list[DeadSymbol] = [] + contains = {"file_path": file_filter} if file_filter else None - # Dead functions: no CALLS edge pointing to them, not an entry point - where_clauses = ["NOT (fn)<-[:CALLS]-()"] - if not include_private: - where_clauses.append("NOT fn.name STARTS WITH '_'") - if file_filter: - where_clauses.append("fn.file_path CONTAINS $ff") - - where = " AND ".join(where_clauses) - params = {"ff": file_filter} if file_filter else {} - - r = conn.execute( - f"MATCH (fn:Function) WHERE {where} RETURN fn.name, fn.file_path, fn.start_line, fn.end_line", - params, - ) - for row in _rows(r): - name = row["fn.name"] + # Dead functions: no incoming CALLS edge, not an entry point, not private + for row in conn.find_nodes_without_incoming( + "Function", + "CALLS", + contains=contains, + exclude_name_prefix=None if include_private else "_", + return_fields=["name", "file_path", "start_line", "end_line"], + ): + name = row["name"] if name in _ENTRY_POINTS: continue dead.append( DeadSymbol( kind="function", name=name, - file_path=row["fn.file_path"], - start_line=row["fn.start_line"], - end_line=row["fn.end_line"], + file_path=row["file_path"], + start_line=row["start_line"], + end_line=row["end_line"], reason="no callers", ) ) - # Dead classes: no INHERITS edge pointing to them, no HAS_METHOD usage - where_clauses_cls = ["NOT (c)<-[:INHERITS]-()"] - if file_filter: - where_clauses_cls.append("c.file_path CONTAINS $ff") - where_cls = " AND ".join(where_clauses_cls) - - r = conn.execute( - f"MATCH (c:Class) WHERE {where_cls} RETURN c.name, c.file_path, c.start_line, c.end_line", - params, - ) - for row in _rows(r): + # Dead classes: no incoming INHERITS edge + for row in conn.find_nodes_without_incoming( + "Class", + "INHERITS", + contains=contains, + return_fields=["name", "file_path", "start_line", "end_line"], + ): dead.append( DeadSymbol( kind="class", - name=row["c.name"], - file_path=row["c.file_path"], - start_line=row["c.start_line"], - end_line=row["c.end_line"], + name=row["name"], + file_path=row["file_path"], + start_line=row["start_line"], + end_line=row["end_line"], reason="no subclasses", ) ) diff --git a/codegraph/core/db_duckdb.py b/codegraph/core/db_duckdb.py index 2392bc2..318de83 100644 --- a/codegraph/core/db_duckdb.py +++ b/codegraph/core/db_duckdb.py @@ -262,6 +262,7 @@ def find_nodes( where: dict[str, Any] | None = None, contains: dict[str, Any] | None = None, return_fields: list[str] | None = None, + order_by: list[str] | None = None, limit: int | None = None, ) -> list[dict[str, Any]]: from codegraph.core.graph_model import NODES @@ -288,9 +289,59 @@ def find_nodes( select_clause = ", ".join(return_fields) if return_fields and return_fields != ["*"] else "*" where_clause = "WHERE " + " AND ".join(clauses) if clauses else "" + order_clause = "ORDER BY " + ", ".join(order_by) if order_by else "" limit_clause = f"LIMIT {int(limit)}" if limit else "" - sql = f"SELECT {select_clause} FROM {spec.table} {where_clause} {limit_clause}" + sql = ( + f"SELECT {select_clause} FROM {spec.table} " + f"{where_clause} {order_clause} {limit_clause}" + ) + + cursor = self._conn.execute(sql, params) + cols = [d[0] for d in (cursor.description or [])] + return [dict(zip(cols, row)) for row in cursor.fetchall()] + + def find_nodes_without_incoming( + self, + label: str, + edge_type: str, + contains: dict[str, Any] | None = None, + exclude_name_prefix: str | None = None, + return_fields: list[str] | None = None, + ) -> list[dict[str, Any]]: + from codegraph.core.graph_model import EDGES, NODES + if label not in NODES: + raise ValueError(f"Unknown node label: {label!r}") + if edge_type not in EDGES: + raise ValueError(f"Unknown edge type: {edge_type!r}") + spec = NODES[label] + edge = EDGES[edge_type] + + if not return_fields: + return_fields = [spec.key_field, "name", "file_path", "start_line", "end_line"] + + params: list[Any] = [] + clauses: list[str] = [] + # LEFT ANTI-JOIN via NOT EXISTS — works on every DuckDB version. + clauses.append( + f"NOT EXISTS (SELECT 1 FROM {edge.table} e WHERE e.{edge.dst_column} = n.{spec.key_field})" + ) + if exclude_name_prefix: + # Plain SUBSTR avoids LIKE wildcard collision (`_` and `%` are + # LIKE metachars in DuckDB; user code that calls this with + # exclude_name_prefix='_' wants the literal underscore). + clauses.append("substr(n.name, 1, ?) <> ?") + params.append(len(exclude_name_prefix)) + params.append(exclude_name_prefix) + if contains: + for field, value in contains.items(): + clauses.append(f"n.{field} LIKE ?") + params.append(f"%{value}%") + + select_clause = ", ".join(f"n.{f}" for f in return_fields) + sql = ( + f"SELECT {select_clause} FROM {spec.table} n WHERE {' AND '.join(clauses)}" + ) cursor = self._conn.execute(sql, params) cols = [d[0] for d in (cursor.description or [])] return [dict(zip(cols, row)) for row in cursor.fetchall()] diff --git a/codegraph/core/db_kuzu.py b/codegraph/core/db_kuzu.py index 681f329..6afba62 100644 --- a/codegraph/core/db_kuzu.py +++ b/codegraph/core/db_kuzu.py @@ -212,6 +212,7 @@ def find_nodes( where: dict[str, Any] | None = None, contains: dict[str, Any] | None = None, return_fields: list[str] | None = None, + order_by: list[str] | None = None, limit: int | None = None, ) -> list[dict[str, Any]]: from codegraph.core.graph_model import NODES @@ -248,8 +249,14 @@ def find_nodes( return_clause = ", ".join(f"n.{f} AS {f}" for f in return_fields) where_clause = "WHERE " + " AND ".join(clauses) if clauses else "" + order_clause = ( + "ORDER BY " + ", ".join(f"n.{f}" for f in order_by) if order_by else "" + ) limit_clause = f"LIMIT {int(limit)}" if limit else "" - cypher = f"MATCH (n:{label}) {where_clause} RETURN {return_clause} {limit_clause}" + cypher = ( + f"MATCH (n:{label}) {where_clause} " + f"RETURN {return_clause} {order_clause} {limit_clause}" + ) result = self._inner.execute(cypher, params) if params else self._inner.execute(cypher) cols = result.get_column_names() @@ -260,6 +267,47 @@ def find_nodes( out.append({c.removeprefix("n."): v for c, v in zip(cols, row)}) return out + def find_nodes_without_incoming( + self, + label: str, + edge_type: str, + contains: dict[str, Any] | None = None, + exclude_name_prefix: str | None = None, + return_fields: list[str] | None = None, + ) -> list[dict[str, Any]]: + from codegraph.core.graph_model import EDGES, NODES + + if label not in NODES: + raise ValueError(f"Unknown node label: {label!r}") + if edge_type not in EDGES: + raise ValueError(f"Unknown edge type: {edge_type!r}") + spec = NODES[label] + + params: dict[str, Any] = {} + clauses: list[str] = [f"NOT (n)<-[:{edge_type}]-()"] + if exclude_name_prefix: + clauses.append("NOT n.name STARTS WITH $_pref") + params["_pref"] = exclude_name_prefix + if contains: + for i, (field, value) in enumerate(contains.items()): + bind = f"_c{i}" + clauses.append(f"n.{field} CONTAINS ${bind}") + params[bind] = value + + if not return_fields: + return_fields = [spec.key_field, "name", "file_path", "start_line", "end_line"] + return_clause = ", ".join(f"n.{f} AS {f}" for f in return_fields) + cypher = ( + f"MATCH (n:{label}) WHERE {' AND '.join(clauses)} RETURN {return_clause}" + ) + result = self._inner.execute(cypher, params) if params else self._inner.execute(cypher) + cols = result.get_column_names() + out: list[dict[str, Any]] = [] + while result.has_next(): + row = result.get_next() + out.append(dict(zip(cols, row))) + return out + def find_neighbors( self, edge_type: str, diff --git a/codegraph/core/protocol.py b/codegraph/core/protocol.py index 4d76bc1..190570b 100644 --- a/codegraph/core/protocol.py +++ b/codegraph/core/protocol.py @@ -142,6 +142,7 @@ def find_nodes( where: dict[str, Any] | None = None, contains: dict[str, Any] | None = None, return_fields: list[str] | None = None, + order_by: list[str] | None = None, limit: int | None = None, ) -> list[dict[str, Any]]: """Return matching nodes as a list of field-keyed dicts. @@ -150,12 +151,33 @@ def find_nodes( - ``contains``: ``{field: value}`` substring search, OR across fields. - ``return_fields``: which columns to include. None = all columns for the label. + - ``order_by``: list of field names to sort ascending by. - ``limit``: optional row cap. Used by symbol_lookup, search_symbols, doc search, and similar "find me nodes matching X" tools. """ + def find_nodes_without_incoming( + self, + label: str, + edge_type: str, + contains: dict[str, Any] | None = None, + exclude_name_prefix: str | None = None, + return_fields: list[str] | None = None, + ) -> list[dict[str, Any]]: + """Return nodes of ``label`` that have no incoming edge of + ``edge_type``. + + ``exclude_name_prefix`` (e.g. ``"_"``) filters out names starting + with that prefix before the result is returned — used by the + dead-code detector to skip dunder / underscore methods that + wouldn't be called from outside. + + Used by analysis.dead_code to find functions with no callers and + classes with no subclasses. + """ + def find_neighbors( self, edge_type: str, diff --git a/codegraph/server/tools_arch.py b/codegraph/server/tools_arch.py index 0bd88e0..1eb7015 100644 --- a/codegraph/server/tools_arch.py +++ b/codegraph/server/tools_arch.py @@ -14,7 +14,6 @@ import json from collections import defaultdict -from codegraph.core.utils import rows as _rows from codegraph.core.utils import short_path as _short_path @@ -64,13 +63,13 @@ def architecture_overview(max_files_per_role: int = 10) -> str: def query(conn): try: - r = conn.execute( - "MATCH (f:File) RETURN f.path, f.lang, f.role, f.layer, f.module_doc " - "ORDER BY f.layer, f.role, f.path" + return conn.find_nodes( + "File", + return_fields=["path", "lang", "role", "layer", "module_doc"], + order_by=["layer", "role", "path"], ) except RuntimeError: return [] - return _rows(r) per_scope = _query_each_kuzu(query) scopes_out: dict[str, dict] = {} @@ -80,13 +79,13 @@ def query(conn): for scope, rows in per_scope: by_layer: dict[str, dict[str, list[dict]]] = defaultdict(lambda: defaultdict(list)) for row in rows: - layer = row.get("f.layer") or "other" - role = row.get("f.role") or "other" + layer = row.get("layer") or "other" + role = row.get("role") or "other" by_layer[layer][role].append( { - "path": _short_path(row["f.path"], _srv._root), - "lang": row.get("f.lang"), - "module_doc": (row.get("f.module_doc") or "")[:180], + "path": _short_path(row["path"], _srv._root), + "lang": row.get("lang"), + "module_doc": (row.get("module_doc") or "")[:180], } ) for layer_dict in by_layer.values(): @@ -126,19 +125,22 @@ def domain_map(keyword: str, limit_per_role: int = 8) -> str: def query(conn): try: - r = conn.execute("MATCH (f:File) RETURN f.path, f.lang, f.role, f.layer, f.module_doc") + files = conn.find_nodes( + "File", + return_fields=["path", "lang", "role", "layer", "module_doc"], + ) except RuntimeError: return [] out = [] - for row in _rows(r): - p = row["f.path"] - doc = row.get("f.module_doc") or "" - role = row.get("f.role") or "other" + for row in files: + p = row["path"] + doc = row.get("module_doc") or "" + role = row.get("role") or "other" if k in p.lower() or k in doc.lower() or k == (role or "").lower(): out.append( { "path": _short_path(p, _srv._root), - "layer": row.get("f.layer"), + "layer": row.get("layer"), "module_doc": doc[:160], "role": role, } @@ -185,34 +187,44 @@ def endpoints(path_pattern: str = "", method: str = "") -> str: def query(conn): try: - r = conn.execute( - "MATCH (e:Endpoint) " - "OPTIONAL MATCH (e)-[:IMPLEMENTED_BY]->(fn:Function) " - "RETURN e.method, e.path, e.framework, e.file_path, e.start_line, fn.name " - "ORDER BY e.path, e.method" + eps = conn.find_nodes( + "Endpoint", + return_fields=["id", "method", "path", "framework", "file_path", "start_line"], + order_by=["path", "method"], ) except RuntimeError: return [] - return _rows(r) + # OPTIONAL MATCH equivalent: for each endpoint, look up handler + # name via the IMPLEMENTED_BY edge. None when there's no handler. + out = [] + for ep in eps: + handlers = conn.find_neighbors( + "IMPLEMENTED_BY", + src_key=ep["id"], + return_dst=["name"], + ) + ep["handler_name"] = handlers[0]["dst_name"] if handlers else None + out.append(ep) + return out per_scope = _query_each_kuzu(query) grouped: dict[str, list[dict]] = defaultdict(list) for scope, rows in per_scope: for row in rows: - url = row.get("e.path") or "" - mth = row.get("e.method") or "" + url = row.get("path") or "" + mth = row.get("method") or "" if method_filter and mth != method_filter: continue if path_pattern and not fnmatch.fnmatch(url, path_pattern): continue - grouped[row.get("e.framework") or "unknown"].append( + grouped[row.get("framework") or "unknown"].append( { "scope": scope, "method": mth, "path": url, - "handler": row.get("fn.name"), - "file": _short_path(row["e.file_path"], _srv._root), - "line": row.get("e.start_line"), + "handler": row.get("handler_name"), + "file": _short_path(row["file_path"], _srv._root), + "line": row.get("start_line"), } ) diff --git a/codegraph/server/tools_docs.py b/codegraph/server/tools_docs.py index 8928e90..8c18d5c 100644 --- a/codegraph/server/tools_docs.py +++ b/codegraph/server/tools_docs.py @@ -11,9 +11,6 @@ import json import os -from codegraph.core.utils import rows as _rows - - def register(mcp) -> None: """Register documentation tools on the given FastMCP instance.""" import codegraph.server as _srv @@ -45,53 +42,47 @@ def search_docs(query: str, limit: int = 10) -> str: q_str = query # capture before shadowing + section_fields = [ + "title", "level", "file_path", "start_line", "end_line", + "body_preview", "anchor", + ] + + def _format(row): + return { + "title": row["title"], + "level": row["level"], + "file": row["file_path"], + "lines": f"{row['start_line']}-{row['end_line']}", + "preview": (row.get("body_preview") or "")[:200], + "anchor": row["anchor"], + } + def run(conn): out: list[dict] = [] seen_ids: set[str] = set() - r = conn.execute( - "MATCH (s:MdSection) WHERE s.title CONTAINS $q " - "RETURN s.title, s.level, s.file_path, s.start_line, s.end_line, " - "s.body_preview, s.anchor LIMIT $lim", - {"q": q_str, "lim": limit}, - ) - for row in _rows(r): - key = f"{row['s.file_path']}:{row['s.start_line']}" + for row in conn.find_nodes( + "MdSection", + contains={"title": q_str}, + return_fields=section_fields, + limit=limit, + ): + key = f"{row['file_path']}:{row['start_line']}" if key in seen_ids: continue seen_ids.add(key) - out.append( - { - "title": row["s.title"], - "level": row["s.level"], - "file": row["s.file_path"], - "lines": f"{row['s.start_line']}-{row['s.end_line']}", - "preview": row["s.body_preview"][:200] if row["s.body_preview"] else "", - "anchor": row["s.anchor"], - } - ) + out.append(_format(row)) if len(out) < limit: - remaining = limit - len(out) - r = conn.execute( - "MATCH (s:MdSection) WHERE s.body_preview CONTAINS $q " - "RETURN s.title, s.level, s.file_path, s.start_line, s.end_line, " - "s.body_preview, s.anchor LIMIT $lim", - {"q": q_str, "lim": remaining}, - ) - for row in _rows(r): - key = f"{row['s.file_path']}:{row['s.start_line']}" + for row in conn.find_nodes( + "MdSection", + contains={"body_preview": q_str}, + return_fields=section_fields, + limit=limit - len(out), + ): + key = f"{row['file_path']}:{row['start_line']}" if key in seen_ids: continue seen_ids.add(key) - out.append( - { - "title": row["s.title"], - "level": row["s.level"], - "file": row["s.file_path"], - "lines": f"{row['s.start_line']}-{row['s.end_line']}", - "preview": row["s.body_preview"][:200] if row["s.body_preview"] else "", - "anchor": row["s.anchor"], - } - ) + out.append(_format(row)) return out results: list[dict] = [] @@ -112,27 +103,26 @@ def doc_outline(file_path: str) -> str: file_path = str(_srv._root / file_path) def query(conn): - r = conn.execute( - "MATCH (s:MdSection) WHERE s.file_path = $p " - "RETURN s.title, s.level, s.start_line, s.end_line, s.anchor " - "ORDER BY s.start_line", - {"p": file_path}, + return conn.find_nodes( + "MdSection", + where={"file_path": file_path}, + return_fields=["title", "level", "start_line", "end_line", "anchor"], + order_by=["start_line"], ) - return _rows(r) outline: list[dict] = [] for scope, rows in _query_each_kuzu(query): for row in rows: - indent = " " * (row["s.level"] - 1) + indent = " " * (row["level"] - 1) outline.append( { "scope": scope, - "title": row["s.title"], - "level": row["s.level"], - "line": row["s.start_line"], - "end_line": row["s.end_line"], - "anchor": row["s.anchor"], - "display": f"{indent}{'#' * row['s.level']} {row['s.title']} (L{row['s.start_line']})", + "title": row["title"], + "level": row["level"], + "line": row["start_line"], + "end_line": row["end_line"], + "anchor": row["anchor"], + "display": f"{indent}{'#' * row['level']} {row['title']} (L{row['start_line']})", } ) if not outline: @@ -149,54 +139,54 @@ def doc_refs(symbol_name: str) -> str: def query(conn): out: list[dict] = [] - r = conn.execute( - """MATCH (s:MdSection)-[r:MD_REFS_SYMBOL]->(fn:Function) - WHERE fn.name = $n - RETURN s.title, s.file_path, s.start_line, s.end_line, r.context""", - {"n": symbol_name}, - ) - for row in _rows(r): + # find_neighbors anchored on dst (the Function/Class side), + # returning src (MdSection) fields + edge context. + for row in conn.find_neighbors( + "MD_REFS_SYMBOL", + dst_where={"name": symbol_name}, + return_src=["title", "file_path", "start_line", "end_line"], + return_edge=["context"], + ): out.append( { - "section": row["s.title"], - "file": row["s.file_path"], - "lines": f"{row['s.start_line']}-{row['s.end_line']}", + "section": row["src_title"], + "file": row["src_file_path"], + "lines": f"{row['src_start_line']}-{row['src_end_line']}", "ref_type": "function", - "context": row["r.context"], + "context": row.get("edge_context") or "", } ) - r = conn.execute( - """MATCH (s:MdSection)-[r:MD_REFS_CLASS]->(c:Class) - WHERE c.name = $n - RETURN s.title, s.file_path, s.start_line, s.end_line, r.context""", - {"n": symbol_name}, - ) - for row in _rows(r): + for row in conn.find_neighbors( + "MD_REFS_CLASS", + dst_where={"name": symbol_name}, + return_src=["title", "file_path", "start_line", "end_line"], + return_edge=["context"], + ): out.append( { - "section": row["s.title"], - "file": row["s.file_path"], - "lines": f"{row['s.start_line']}-{row['s.end_line']}", + "section": row["src_title"], + "file": row["src_file_path"], + "lines": f"{row['src_start_line']}-{row['src_end_line']}", "ref_type": "class", - "context": row["r.context"], + "context": row.get("edge_context") or "", } ) - r = conn.execute( - "MATCH (s:MdSection) WHERE s.body_preview CONTAINS $n " - "RETURN s.title, s.file_path, s.start_line, s.end_line LIMIT 10", - {"n": symbol_name}, - ) seen = {(it["file"], it["lines"]) for it in out} - for row in _rows(r): - key = (row["s.file_path"], f"{row['s.start_line']}-{row['s.end_line']}") + for row in conn.find_nodes( + "MdSection", + contains={"body_preview": symbol_name}, + return_fields=["title", "file_path", "start_line", "end_line"], + limit=10, + ): + key = (row["file_path"], f"{row['start_line']}-{row['end_line']}") if key in seen: continue seen.add(key) out.append( { - "section": row["s.title"], - "file": row["s.file_path"], - "lines": f"{row['s.start_line']}-{row['s.end_line']}", + "section": row["title"], + "file": row["file_path"], + "lines": f"{row['start_line']}-{row['end_line']}", "ref_type": "text_mention", "context": "body", } diff --git a/tests/test_core/test_graph_helpers.py b/tests/test_core/test_graph_helpers.py index e2755f2..ceff715 100644 --- a/tests/test_core/test_graph_helpers.py +++ b/tests/test_core/test_graph_helpers.py @@ -167,6 +167,46 @@ def test_limit(self, graphdb): assert len(rows) == 2 +class TestFindNodesOrderBy: + def test_order_by_field(self, graphdb): + graphdb.upsert_node("Function", "id", "a::z", {"name": "z", "file_path": "/a.py", "start_line": 30}) + graphdb.upsert_node("Function", "id", "a::a", {"name": "a", "file_path": "/a.py", "start_line": 10}) + graphdb.upsert_node("Function", "id", "a::m", {"name": "m", "file_path": "/a.py", "start_line": 20}) + rows = graphdb.find_nodes( + "Function", return_fields=["name"], order_by=["start_line"] + ) + assert [r["name"] for r in rows] == ["a", "m", "z"] + + +class TestFindNodesWithoutIncoming: + def test_function_with_no_callers_returned(self, graphdb): + # foo() is never called + graphdb.upsert_node("Function", "id", "a::foo", {"name": "foo", "file_path": "/a.py", "start_line": 1, "end_line": 2}) + # bar() is called by baz() + graphdb.upsert_node("Function", "id", "a::bar", {"name": "bar", "file_path": "/a.py", "start_line": 4, "end_line": 5}) + graphdb.upsert_node("Function", "id", "a::baz", {"name": "baz", "file_path": "/a.py", "start_line": 8, "end_line": 9}) + graphdb.ensure_edge("CALLS", "a::baz", "a::bar") + + rows = graphdb.find_nodes_without_incoming( + "Function", "CALLS", return_fields=["name"] + ) + names = {r["name"] for r in rows} + assert "foo" in names + # baz is never called either; foo + baz expected. bar has caller -> excluded. + assert "baz" in names + assert "bar" not in names + + def test_exclude_underscore_prefix(self, graphdb): + graphdb.upsert_node("Function", "id", "a::_priv", {"name": "_priv", "file_path": "/a.py", "start_line": 1, "end_line": 2}) + graphdb.upsert_node("Function", "id", "a::pub", {"name": "pub", "file_path": "/a.py", "start_line": 4, "end_line": 5}) + rows = graphdb.find_nodes_without_incoming( + "Function", "CALLS", exclude_name_prefix="_", return_fields=["name"] + ) + names = {r["name"] for r in rows} + assert "pub" in names + assert "_priv" not in names + + class TestFindNeighbors: def setup_data(self, db): db.upsert_node("Function", "id", "a::caller", {"name": "caller", "file_path": "/a.py"})