Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 20 additions & 26 deletions codegraph/analysis/context_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,6 @@
import sqlite3
from dataclasses import dataclass, field

import kuzu

from codegraph.core.utils import rows as _rows
from codegraph.core.fts import fts_search


Expand Down Expand Up @@ -247,7 +244,7 @@ def _keyword_query(task: str, min_len: int = 3) -> str:

def context_for_task(
task: str,
kuzu_conn: kuzu.Connection,
kuzu_conn,
fts_conn: sqlite3.Connection,
max_nodes: int = 15,
) -> TaskContext:
Expand Down Expand Up @@ -297,35 +294,32 @@ def context_for_task(
relevance=min(relevance, 1.0),
)

# Step 3: Expand via graph — find callers/callees
# Step 3: Expand via graph — find callers/callees (up to 3 each)
if r.kind == "function":
callers = _rows(
kuzu_conn.execute(
"MATCH (caller:Function)-[:CALLS]->(fn:Function) WHERE fn.name = $n RETURN caller.name LIMIT 3",
{"n": r.name},
)
)
callers = kuzu_conn.find_neighbors(
"CALLS",
dst_where={"name": r.name},
return_src=["name"],
)[:3]
for c in callers:
node.relationships.append(f"called by {c['caller.name']}")
node.relationships.append(f"called by {c['src_name']}")

callees = _rows(
kuzu_conn.execute(
"MATCH (fn:Function)-[:CALLS]->(callee:Function) WHERE fn.name = $n RETURN callee.name LIMIT 3",
{"n": r.name},
)
)
callees = kuzu_conn.find_neighbors(
"CALLS",
src_where={"name": r.name},
return_dst=["name"],
)[:3]
for c in callees:
node.relationships.append(f"calls {c['callee.name']}")
node.relationships.append(f"calls {c['dst_name']}")

elif r.kind == "class":
parents = _rows(
kuzu_conn.execute(
"MATCH (c:Class)-[:INHERITS]->(p:Class) WHERE c.name = $n RETURN p.name LIMIT 3",
{"n": r.name},
)
)
parents = kuzu_conn.find_neighbors(
"INHERITS",
src_where={"name": r.name},
return_dst=["name"],
)[:3]
for p in parents:
node.relationships.append(f"extends {p['p.name']}")
node.relationships.append(f"extends {p['dst_name']}")

nodes.append(node)

Expand Down
64 changes: 25 additions & 39 deletions codegraph/analysis/dead_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,6 @@

from dataclasses import dataclass

import kuzu

from codegraph.core.utils import rows as _rows

# Entry-point names that are never "dead" even without incoming edges
_ENTRY_POINTS = {
"__init__",
Expand Down Expand Up @@ -45,60 +41,50 @@ class DeadSymbol:


def find_dead_code(
conn: kuzu.Connection,
conn,
include_private: bool = False,
file_filter: str | None = None,
) -> list[DeadSymbol]:
"""Find functions and classes with no incoming CALLS / INHERITS edges."""
dead: list[DeadSymbol] = []
contains = {"file_path": file_filter} if file_filter else None

# Dead functions: no CALLS edge pointing to them, not an entry point
where_clauses = ["NOT (fn)<-[:CALLS]-()"]
if not include_private:
where_clauses.append("NOT fn.name STARTS WITH '_'")
if file_filter:
where_clauses.append("fn.file_path CONTAINS $ff")

where = " AND ".join(where_clauses)
params = {"ff": file_filter} if file_filter else {}

r = conn.execute(
f"MATCH (fn:Function) WHERE {where} RETURN fn.name, fn.file_path, fn.start_line, fn.end_line",
params,
)
for row in _rows(r):
name = row["fn.name"]
# Dead functions: no incoming CALLS edge, not an entry point, not private
for row in conn.find_nodes_without_incoming(
"Function",
"CALLS",
contains=contains,
exclude_name_prefix=None if include_private else "_",
return_fields=["name", "file_path", "start_line", "end_line"],
):
name = row["name"]
if name in _ENTRY_POINTS:
continue
dead.append(
DeadSymbol(
kind="function",
name=name,
file_path=row["fn.file_path"],
start_line=row["fn.start_line"],
end_line=row["fn.end_line"],
file_path=row["file_path"],
start_line=row["start_line"],
end_line=row["end_line"],
reason="no callers",
)
)

# Dead classes: no INHERITS edge pointing to them, no HAS_METHOD usage
where_clauses_cls = ["NOT (c)<-[:INHERITS]-()"]
if file_filter:
where_clauses_cls.append("c.file_path CONTAINS $ff")
where_cls = " AND ".join(where_clauses_cls)

r = conn.execute(
f"MATCH (c:Class) WHERE {where_cls} RETURN c.name, c.file_path, c.start_line, c.end_line",
params,
)
for row in _rows(r):
# Dead classes: no incoming INHERITS edge
for row in conn.find_nodes_without_incoming(
"Class",
"INHERITS",
contains=contains,
return_fields=["name", "file_path", "start_line", "end_line"],
):
dead.append(
DeadSymbol(
kind="class",
name=row["c.name"],
file_path=row["c.file_path"],
start_line=row["c.start_line"],
end_line=row["c.end_line"],
name=row["name"],
file_path=row["file_path"],
start_line=row["start_line"],
end_line=row["end_line"],
reason="no subclasses",
)
)
Expand Down
53 changes: 52 additions & 1 deletion codegraph/core/db_duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ def find_nodes(
where: dict[str, Any] | None = None,
contains: dict[str, Any] | None = None,
return_fields: list[str] | None = None,
order_by: list[str] | None = None,
limit: int | None = None,
) -> list[dict[str, Any]]:
from codegraph.core.graph_model import NODES
Expand All @@ -288,9 +289,59 @@ def find_nodes(

select_clause = ", ".join(return_fields) if return_fields and return_fields != ["*"] else "*"
where_clause = "WHERE " + " AND ".join(clauses) if clauses else ""
order_clause = "ORDER BY " + ", ".join(order_by) if order_by else ""
limit_clause = f"LIMIT {int(limit)}" if limit else ""
sql = f"SELECT {select_clause} FROM {spec.table} {where_clause} {limit_clause}"
sql = (
f"SELECT {select_clause} FROM {spec.table} "
f"{where_clause} {order_clause} {limit_clause}"
)

cursor = self._conn.execute(sql, params)
cols = [d[0] for d in (cursor.description or [])]
return [dict(zip(cols, row)) for row in cursor.fetchall()]

def find_nodes_without_incoming(
self,
label: str,
edge_type: str,
contains: dict[str, Any] | None = None,
exclude_name_prefix: str | None = None,
return_fields: list[str] | None = None,
) -> list[dict[str, Any]]:
from codegraph.core.graph_model import EDGES, NODES

if label not in NODES:
raise ValueError(f"Unknown node label: {label!r}")
if edge_type not in EDGES:
raise ValueError(f"Unknown edge type: {edge_type!r}")
spec = NODES[label]
edge = EDGES[edge_type]

if not return_fields:
return_fields = [spec.key_field, "name", "file_path", "start_line", "end_line"]

params: list[Any] = []
clauses: list[str] = []
# LEFT ANTI-JOIN via NOT EXISTS — works on every DuckDB version.
clauses.append(
f"NOT EXISTS (SELECT 1 FROM {edge.table} e WHERE e.{edge.dst_column} = n.{spec.key_field})"
)
if exclude_name_prefix:
# Plain SUBSTR avoids LIKE wildcard collision (`_` and `%` are
# LIKE metachars in DuckDB; user code that calls this with
# exclude_name_prefix='_' wants the literal underscore).
clauses.append("substr(n.name, 1, ?) <> ?")
params.append(len(exclude_name_prefix))
params.append(exclude_name_prefix)
if contains:
for field, value in contains.items():
clauses.append(f"n.{field} LIKE ?")
params.append(f"%{value}%")

select_clause = ", ".join(f"n.{f}" for f in return_fields)
sql = (
f"SELECT {select_clause} FROM {spec.table} n WHERE {' AND '.join(clauses)}"
)
cursor = self._conn.execute(sql, params)
cols = [d[0] for d in (cursor.description or [])]
return [dict(zip(cols, row)) for row in cursor.fetchall()]
Expand Down
50 changes: 49 additions & 1 deletion codegraph/core/db_kuzu.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ def find_nodes(
where: dict[str, Any] | None = None,
contains: dict[str, Any] | None = None,
return_fields: list[str] | None = None,
order_by: list[str] | None = None,
limit: int | None = None,
) -> list[dict[str, Any]]:
from codegraph.core.graph_model import NODES
Expand Down Expand Up @@ -248,8 +249,14 @@ def find_nodes(
return_clause = ", ".join(f"n.{f} AS {f}" for f in return_fields)

where_clause = "WHERE " + " AND ".join(clauses) if clauses else ""
order_clause = (
"ORDER BY " + ", ".join(f"n.{f}" for f in order_by) if order_by else ""
)
limit_clause = f"LIMIT {int(limit)}" if limit else ""
cypher = f"MATCH (n:{label}) {where_clause} RETURN {return_clause} {limit_clause}"
cypher = (
f"MATCH (n:{label}) {where_clause} "
f"RETURN {return_clause} {order_clause} {limit_clause}"
)

result = self._inner.execute(cypher, params) if params else self._inner.execute(cypher)
cols = result.get_column_names()
Expand All @@ -260,6 +267,47 @@ def find_nodes(
out.append({c.removeprefix("n."): v for c, v in zip(cols, row)})
return out

def find_nodes_without_incoming(
self,
label: str,
edge_type: str,
contains: dict[str, Any] | None = None,
exclude_name_prefix: str | None = None,
return_fields: list[str] | None = None,
) -> list[dict[str, Any]]:
from codegraph.core.graph_model import EDGES, NODES

if label not in NODES:
raise ValueError(f"Unknown node label: {label!r}")
if edge_type not in EDGES:
raise ValueError(f"Unknown edge type: {edge_type!r}")
spec = NODES[label]

params: dict[str, Any] = {}
clauses: list[str] = [f"NOT (n)<-[:{edge_type}]-()"]
if exclude_name_prefix:
clauses.append("NOT n.name STARTS WITH $_pref")
params["_pref"] = exclude_name_prefix
if contains:
for i, (field, value) in enumerate(contains.items()):
bind = f"_c{i}"
clauses.append(f"n.{field} CONTAINS ${bind}")
params[bind] = value

if not return_fields:
return_fields = [spec.key_field, "name", "file_path", "start_line", "end_line"]
return_clause = ", ".join(f"n.{f} AS {f}" for f in return_fields)
cypher = (
f"MATCH (n:{label}) WHERE {' AND '.join(clauses)} RETURN {return_clause}"
)
result = self._inner.execute(cypher, params) if params else self._inner.execute(cypher)
cols = result.get_column_names()
out: list[dict[str, Any]] = []
while result.has_next():
row = result.get_next()
out.append(dict(zip(cols, row)))
return out

def find_neighbors(
self,
edge_type: str,
Expand Down
22 changes: 22 additions & 0 deletions codegraph/core/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def find_nodes(
where: dict[str, Any] | None = None,
contains: dict[str, Any] | None = None,
return_fields: list[str] | None = None,
order_by: list[str] | None = None,
limit: int | None = None,
) -> list[dict[str, Any]]:
"""Return matching nodes as a list of field-keyed dicts.
Expand All @@ -150,12 +151,33 @@ def find_nodes(
- ``contains``: ``{field: value}`` substring search, OR across fields.
- ``return_fields``: which columns to include. None = all columns
for the label.
- ``order_by``: list of field names to sort ascending by.
- ``limit``: optional row cap.

Used by symbol_lookup, search_symbols, doc search, and similar
"find me nodes matching X" tools.
"""

def find_nodes_without_incoming(
self,
label: str,
edge_type: str,
contains: dict[str, Any] | None = None,
exclude_name_prefix: str | None = None,
return_fields: list[str] | None = None,
) -> list[dict[str, Any]]:
"""Return nodes of ``label`` that have no incoming edge of
``edge_type``.

``exclude_name_prefix`` (e.g. ``"_"``) filters out names starting
with that prefix before the result is returned — used by the
dead-code detector to skip dunder / underscore methods that
wouldn't be called from outside.

Used by analysis.dead_code to find functions with no callers and
classes with no subclasses.
"""

def find_neighbors(
self,
edge_type: str,
Expand Down
Loading
Loading