From 152ce07955a41dc809804b2d0c5db5109d3fb408 Mon Sep 17 00:00:00 2001 From: map588 Date: Sat, 4 Apr 2026 19:08:09 -0400 Subject: [PATCH 1/6] fix(cypher): prevent stack buffer overflow in string literal lexer --- src/cypher/cypher.c | 2 ++ tests/test_cypher.c | 27 +++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/src/cypher/cypher.c b/src/cypher/cypher.c index 6aedeb92..89ff779e 100644 --- a/src/cypher/cypher.c +++ b/src/cypher/cypher.c @@ -92,7 +92,9 @@ static void lex_string_literal(const char *input, int len, int *pos, char quote, int start = *pos; char buf[CBM_SZ_4K]; int blen = 0; + const int max_blen = CBM_SZ_4K - 1; while (*pos < len && input[*pos] != quote) { + if (blen >= max_blen) { (*pos)++; continue; } if (input[*pos] == '\\' && *pos + SKIP_ONE < len) { (*pos)++; switch (input[*pos]) { diff --git a/tests/test_cypher.c b/tests/test_cypher.c index 13527d53..a1694322 100644 --- a/tests/test_cypher.c +++ b/tests/test_cypher.c @@ -78,6 +78,32 @@ TEST(cypher_lex_single_quote_string) { PASS(); } +TEST(cypher_lex_string_overflow) { + /* Build a string literal longer than 4096 bytes to verify we don't + * overflow the stack buffer in lex_string_literal. */ + const int big = 5000; + /* query: "AAAA...A" (quotes included) */ + char *query = malloc(big + 3); /* quote + big chars + quote + NUL */ + ASSERT_NOT_NULL(query); + query[0] = '"'; + memset(query + 1, 'A', big); + query[big + 1] = '"'; + query[big + 2] = '\0'; + + cbm_lex_result_t r = {0}; + int rc = cbm_lex(query, &r); + ASSERT_EQ(rc, 0); + ASSERT_NULL(r.error); + ASSERT_GTE(r.count, 1); + ASSERT_EQ(r.tokens[0].type, TOK_STRING); + /* The string should be truncated to CBM_SZ_4K - 1 (4095) characters. */ + ASSERT_EQ((int)strlen(r.tokens[0].text), 4095); + + cbm_lex_free(&r); + free(query); + PASS(); +} + TEST(cypher_lex_number) { cbm_lex_result_t r = {0}; int rc = cbm_lex("42 3.14", &r); @@ -2064,6 +2090,7 @@ SUITE(cypher) { RUN_TEST(cypher_lex_relationship); RUN_TEST(cypher_lex_string_literal); RUN_TEST(cypher_lex_single_quote_string); + RUN_TEST(cypher_lex_string_overflow); RUN_TEST(cypher_lex_number); RUN_TEST(cypher_lex_operators); RUN_TEST(cypher_lex_keywords_case_insensitive); From 1d39640032592efeeeed0aeda2c6cd47acceded3 Mon Sep 17 00:00:00 2001 From: map588 Date: Sat, 4 Apr 2026 20:17:47 -0400 Subject: [PATCH 2/6] fix(store): add sqlite3_prepare_v2 error checks to prevent NULL stmt crashes Three locations in store.c called sqlite3_prepare_v2 without checking the return code. If the statement fails to prepare (DB corruption, malformed SQL), subsequent bind_text and sqlite3_step calls dereference NULL, crashing the server. Co-Authored-By: Claude Opus 4.6 (1M context) --- .claude/agents/c-test-writer.md | 23 +++++++++++++++ .claude/agents/security-reviewer.md | 20 +++++++++++++ .claude/settings.json | 26 +++++++++++++++++ .claude/skills/add-language/SKILL.md | 39 ++++++++++++++++++++++++++ .claude/skills/security-audit/SKILL.md | 23 +++++++++++++++ .claude/worktrees/improvements | 1 + .gitignore | 4 +++ src/cypher/cypher.c | 19 +++++++++++++ src/foundation/compat_thread.c | 12 ++++++++ src/foundation/compat_thread.h | 3 ++ src/main.c | 3 ++ src/store/store.c | 12 ++++++-- src/ui/http_server.c | 14 +++++---- src/ui/http_server.h | 3 ++ 14 files changed, 194 insertions(+), 8 deletions(-) create mode 100644 .claude/agents/c-test-writer.md create mode 100644 .claude/agents/security-reviewer.md create mode 100644 .claude/settings.json create mode 100644 .claude/skills/add-language/SKILL.md create mode 100644 .claude/skills/security-audit/SKILL.md create mode 160000 .claude/worktrees/improvements diff --git a/.claude/agents/c-test-writer.md b/.claude/agents/c-test-writer.md new file mode 100644 index 00000000..425f91c5 --- /dev/null +++ b/.claude/agents/c-test-writer.md @@ -0,0 +1,23 @@ +You write C tests for a pure C11 codebase using the custom test framework in `tests/test_framework.h`. + +## Conventions + +- Use the `TEST(name)` macro to define test functions. +- Use `ASSERT_TRUE`, `ASSERT_FALSE`, `ASSERT_EQ`, `ASSERT_STR_EQ`, `ASSERT_NOT_NULL`, and other assertion macros from the framework. +- Each test must be self-contained with proper setup and teardown (especially freeing arenas and closing store handles). +- Tests compile with ASan + UBSan — no memory leaks, no undefined behavior. + +## Patterns to follow + +- **Store tests**: See `tests/test_store_nodes.c`, `tests/test_store_edges.c` — open a temporary in-memory store, perform operations, assert results, close store. +- **Pipeline tests**: See `tests/test_pipeline.c` — write source to a temp file, run pipeline passes, query the resulting graph. +- **Extraction tests**: See `tests/test_extraction.c` — parse source with tree-sitter, verify extracted functions/classes/calls. +- **MCP tests**: See `tests/test_mcp.c` — construct JSON-RPC requests, call handlers, verify JSON responses. +- **Foundation tests**: See `tests/test_arena.c`, `tests/test_hash_table.c` — unit test data structures directly. + +## Build and run + +```bash +scripts/test.sh # Full suite with sanitizers +make -f Makefile.cbm test-foundation # Foundation tests only (fast) +``` diff --git a/.claude/agents/security-reviewer.md b/.claude/agents/security-reviewer.md new file mode 100644 index 00000000..4dcd93f9 --- /dev/null +++ b/.claude/agents/security-reviewer.md @@ -0,0 +1,20 @@ +You are a security reviewer for a pure C11 codebase that implements an MCP server. + +## What to check + +1. **Dangerous calls** — Any new `system()`, `popen()`, `fork()`, `exec*()`, or network calls must be listed in `scripts/security-allowlist.txt`. Flag any that are missing. +2. **Buffer safety** — Look for unbounded `strcpy`, `sprintf`, `strcat`, `gets`. All should use bounded variants (`strncpy`, `snprintf`, arena-allocated buffers). +3. **SQL injection** — All queries in `src/store/store.c` must use parameterized statements (`sqlite3_bind_*`). Flag any string-concatenated SQL. +4. **Prompt injection** — MCP tool handlers in `src/mcp/mcp.c` must validate and sanitize all user-provided input before including it in responses or graph queries. +5. **Memory safety** — Check for use-after-free, double-free, null dereference, and uninitialized reads. The project uses arena allocators (`src/foundation/arena.c`) — verify allocations go through arenas where appropriate. +6. **NOLINT usage** — Any `// NOLINT` suppression must be whitelisted in `src/foundation/recursion_whitelist.h`. Flag unwhitelisted suppressions. +7. **Integer overflow** — Check size calculations, especially in allocation paths and buffer length computations. + +## How to verify + +Run the 8-layer security audit: +```bash +make -f Makefile.cbm security +``` + +Review `scripts/security-allowlist.txt` for the current allow-list of dangerous calls. diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 00000000..a897c34e --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,26 @@ +{ + "hooks": { + "PostToolUse": [ + { + "matcher": "Edit|Write", + "hooks": [ + { + "type": "command", + "command": "if echo \"$CLAUDE_FILE_PATH\" | grep -qE '\\.([ch])$'; then clang-format -i \"$CLAUDE_FILE_PATH\"; fi" + } + ] + } + ], + "PreToolUse": [ + { + "matcher": "Edit|Write", + "hooks": [ + { + "type": "command", + "command": "if echo \"$CLAUDE_FILE_PATH\" | grep -qE '(vendored/|internal/cbm/grammar_)'; then echo 'BLOCKED: Do not edit vendored or generated grammar files' >&2; exit 1; fi" + } + ] + } + ] + } +} diff --git a/.claude/skills/add-language/SKILL.md b/.claude/skills/add-language/SKILL.md new file mode 100644 index 00000000..02e9061f --- /dev/null +++ b/.claude/skills/add-language/SKILL.md @@ -0,0 +1,39 @@ +--- +name: add-language +description: Guide through adding or fixing language support (tree-sitter extraction + pipeline passes) +disable-model-invocation: true +--- + +# Adding Language Support + +Language support has two layers. Determine which type of language you're adding: + +## Standard Languages (need tree-sitter grammar) + +1. **Add grammar** — Vendor the tree-sitter grammar into `internal/cbm/grammar_.c` using `scripts/vendor-grammar.sh` +2. **Configure node types** — Add language entry in `internal/cbm/lang_specs.c` with AST node types for functions, classes, calls, imports +3. **Write extractor** — Create `internal/cbm/extract_.c` for language-specific extraction logic +4. **Add enum** — Add `CBM_LANG_` to `internal/cbm/cbm.h` +5. **Hook into pipeline** — Update `src/pipeline/pipeline.c` for call resolution, usage tracking +6. **Add tests**: + - `tests/test_extraction.c` — AST extraction regression tests + - `tests/test_pipeline.c` — Integration-level pipeline tests + +## Infrastructure Languages (Dockerfile, K8s, etc. — no new grammar needed) + +Follow the **infra-pass pattern**: + +1. **Detection helper** — Add `cbm_is__file()` in `src/pipeline/pass_infrascan.c` +2. **Enum value** — Add `CBM_LANG_` in `internal/cbm/cbm.h` and row in `lang_specs.c` +3. **Custom extractor** — Write extractor returning `CBMFileResult*` (reuse YAML grammar if applicable) +4. **Pipeline pass** — Register in `pipeline.c` +5. **Tests** — Follow `TEST(infra_is_dockerfile)` and `TEST(k8s_extract_manifest)` patterns in `tests/test_pipeline.c` + +## Verification + +```bash +scripts/test.sh # Full test suite +scripts/lint.sh # Must pass all linters +``` + +Test against a real open-source repo that uses the language. diff --git a/.claude/skills/security-audit/SKILL.md b/.claude/skills/security-audit/SKILL.md new file mode 100644 index 00000000..c23e7bf6 --- /dev/null +++ b/.claude/skills/security-audit/SKILL.md @@ -0,0 +1,23 @@ +--- +name: security-audit +description: Run the full 8-layer security audit and analyze results +--- + +Run the 8-layer security audit: + +```bash +make -f Makefile.cbm security +``` + +Analyze the output. The 8 layers are: + +1. **Static allow-list audit** — Checks for dangerous calls (`system`, `popen`, `fork`, network) not in `scripts/security-allowlist.txt` +2. **Binary string scan** — Searches compiled binary for suspicious strings +3. **UI audit** — Validates embedded frontend assets +4. **Install audit** — Checks install scripts for unsafe operations +5. **Network egress test** — Verifies no unauthorized network access +6. **MCP robustness (fuzz)** — Sends malformed JSON-RPC to test input handling +7. **Vendored dependency integrity** — Verifies vendored source checksums +8. **Frontend integrity** — Checks graph-ui build artifacts + +For each failure, explain what the layer checks, why it failed, and how to fix it. If a new dangerous call is intentional, guide adding it to `scripts/security-allowlist.txt`. diff --git a/.claude/worktrees/improvements b/.claude/worktrees/improvements new file mode 160000 index 00000000..1d30971f --- /dev/null +++ b/.claude/worktrees/improvements @@ -0,0 +1 @@ +Subproject commit 1d30971ff0f7a817e2e60f8c16f604e893a73166 diff --git a/.gitignore b/.gitignore index 7a7666e0..ca040e13 100644 --- a/.gitignore +++ b/.gitignore @@ -40,6 +40,10 @@ Thumbs.db # Local project memory (Claude Code auto-memory) memory/ reference/ +.remember/ + +CLAUDE.md +docs/superpowers/ # Build artifacts build/ diff --git a/src/cypher/cypher.c b/src/cypher/cypher.c index 89ff779e..24c445ab 100644 --- a/src/cypher/cypher.c +++ b/src/cypher/cypher.c @@ -471,6 +471,9 @@ static int parse_props(parser_t *p, cbm_prop_filter_t **out, int *count) { int cap = CYP_INIT_CAP4; int n = 0; cbm_prop_filter_t *arr = malloc(cap * sizeof(cbm_prop_filter_t)); + if (!arr) { + return CBM_NOT_FOUND; + } while (!check(p, TOK_RBRACE) && !check(p, TOK_EOF)) { const cbm_token_t *key = expect(p, TOK_IDENT); @@ -571,6 +574,9 @@ static int parse_rel_types(parser_t *p, cbm_rel_pattern_t *out) { int cap = CYP_INIT_CAP4; int n = 0; const char **types = malloc(cap * sizeof(const char *)); + if (!types) { + return CBM_NOT_FOUND; + } const cbm_token_t *t = expect(p, TOK_IDENT); if (!t) { @@ -764,6 +770,12 @@ static cbm_expr_t *parse_in_list(parser_t *p, cbm_condition_t *c) { int vcap = CYP_INIT_CAP8; int vn = 0; const char **vals = malloc(vcap * sizeof(const char *)); + if (!vals) { + free((void *)c->variable); + free((void *)c->property); + free((void *)c->op); + return NULL; + } while (!check(p, TOK_RBRACKET) && !check(p, TOK_EOF)) { if (vn > 0) { match(p, TOK_COMMA); @@ -1063,8 +1075,15 @@ static const char *parse_value_literal(parser_t *p) { static cbm_case_expr_t *parse_case_expr(parser_t *p) { /* CASE already consumed */ cbm_case_expr_t *kase = calloc(CBM_ALLOC_ONE, sizeof(cbm_case_expr_t)); + if (!kase) { + return NULL; + } int bcap = CYP_INIT_CAP4; kase->branches = malloc(bcap * sizeof(cbm_case_branch_t)); + if (!kase->branches) { + free(kase); + return NULL; + } while (check(p, TOK_WHEN)) { advance(p); diff --git a/src/foundation/compat_thread.c b/src/foundation/compat_thread.c index e87afb12..163aaa2b 100644 --- a/src/foundation/compat_thread.c +++ b/src/foundation/compat_thread.c @@ -59,6 +59,14 @@ int cbm_thread_join(cbm_thread_t *t) { return 0; } +int cbm_thread_detach(cbm_thread_t *t) { + if (t->handle) { + CloseHandle(t->handle); + t->handle = NULL; + } + return 0; +} + #else /* POSIX */ int cbm_thread_create(cbm_thread_t *t, size_t stack_size, void *(*fn)(void *), void *arg) { @@ -77,6 +85,10 @@ int cbm_thread_join(cbm_thread_t *t) { return pthread_join(t->handle, NULL); } +int cbm_thread_detach(cbm_thread_t *t) { + return pthread_detach(t->handle); +} + #endif /* ── Mutex ────────────────────────────────────────────────────── */ diff --git a/src/foundation/compat_thread.h b/src/foundation/compat_thread.h index 145b68bf..7d561093 100644 --- a/src/foundation/compat_thread.h +++ b/src/foundation/compat_thread.h @@ -39,6 +39,9 @@ int cbm_thread_create(cbm_thread_t *t, size_t stack_size, void *(*fn)(void *), v /* Wait for thread to finish. Returns 0 on success. */ int cbm_thread_join(cbm_thread_t *t); +/* Detach thread so resources are freed on exit. Returns 0 on success. */ +int cbm_thread_detach(cbm_thread_t *t); + /* ── Mutex ────────────────────────────────────────────────────── */ #ifdef _WIN32 diff --git a/src/main.c b/src/main.c index 9a79d05e..9f8f187f 100644 --- a/src/main.c +++ b/src/main.c @@ -307,6 +307,9 @@ int main(int argc, char **argv) { } /* Create and start watcher in background thread */ + /* Initialize log mutex before any threads are created */ + cbm_ui_log_init(); + cbm_store_t *watch_store = cbm_store_open_memory(); g_watcher = cbm_watcher_new(watch_store, watcher_index_fn, NULL); diff --git a/src/store/store.c b/src/store/store.c index 4920732e..4d5db451 100644 --- a/src/store/store.c +++ b/src/store/store.c @@ -2552,7 +2552,9 @@ int cbm_store_get_schema(cbm_store_t *s, const char *project, cbm_schema_info_t const char *sql = "SELECT label, COUNT(*) FROM nodes WHERE project = ?1 GROUP BY label " "ORDER BY COUNT(*) DESC;"; sqlite3_stmt *stmt = NULL; - sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL); + if (sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK || !stmt) { + return CBM_NOT_FOUND; + } bind_text(stmt, SKIP_ONE, project); int cap = ST_INIT_CAP_8; @@ -2577,7 +2579,9 @@ int cbm_store_get_schema(cbm_store_t *s, const char *project, cbm_schema_info_t const char *sql = "SELECT type, COUNT(*) FROM edges WHERE project = ?1 GROUP BY type ORDER " "BY COUNT(*) DESC;"; sqlite3_stmt *stmt = NULL; - sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL); + if (sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK || !stmt) { + return CBM_NOT_FOUND; + } bind_text(stmt, SKIP_ONE, project); int cap = ST_INIT_CAP_8; @@ -3283,7 +3287,9 @@ static bool pkg_in_list(const char *pkg, char **list, int count) { static int collect_pkg_names(cbm_store_t *s, const char *sql, const char *project, char **pkgs, int max_pkgs) { sqlite3_stmt *stmt = NULL; - sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL); + if (sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK || !stmt) { + return 0; + } bind_text(stmt, SKIP_ONE, project); int count = 0; while (sqlite3_step(stmt) == SQLITE_ROW && count < max_pkgs) { diff --git a/src/ui/http_server.c b/src/ui/http_server.c index 053f317b..f5af47fa 100644 --- a/src/ui/http_server.c +++ b/src/ui/http_server.c @@ -142,14 +142,17 @@ static int g_log_count = 0; static cbm_mutex_t g_log_mutex; static atomic_int g_log_mutex_init = 0; +/* Must be called once before any threads are created. */ +void cbm_ui_log_init(void) { + if (!atomic_exchange(&g_log_mutex_init, 1)) { + cbm_mutex_init(&g_log_mutex); + } +} + /* Called from a log hook — appends a line to the ring buffer (thread-safe) */ void cbm_ui_log_append(const char *line) { - if (!line) + if (!line || !atomic_load(&g_log_mutex_init)) return; - if (!atomic_load(&g_log_mutex_init)) { - cbm_mutex_init(&g_log_mutex); - atomic_store(&g_log_mutex_init, 1); - } cbm_mutex_lock(&g_log_mutex); snprintf(g_log_ring[g_log_head], LOG_LINE_MAX, "%s", line); g_log_head = (g_log_head + 1) % LOG_RING_SIZE; @@ -791,6 +794,7 @@ static void handle_index_start(struct mg_connection *c, struct mg_http_message * mg_http_reply(c, 500, g_cors_json, "{\"error\":\"thread creation failed\"}"); return; } + cbm_thread_detach(&tid); /* Don't leak thread handle */ mg_http_reply(c, 202, g_cors_json, "{\"status\":\"indexing\",\"slot\":%d,\"path\":\"%s\"}", slot, job->root_path); diff --git a/src/ui/http_server.h b/src/ui/http_server.h index 4858a049..4a63a0f5 100644 --- a/src/ui/http_server.h +++ b/src/ui/http_server.h @@ -32,6 +32,9 @@ void cbm_http_server_run(cbm_http_server_t *srv); /* Check if the server started successfully (listener bound). */ bool cbm_http_server_is_running(const cbm_http_server_t *srv); +/* Initialize the log ring buffer mutex. Must be called once before any threads. */ +void cbm_ui_log_init(void); + /* Append a log line to the UI ring buffer (called from log hook). */ void cbm_ui_log_append(const char *line); From fd360458381457db883931f825653547f63e7f83 Mon Sep 17 00:00:00 2001 From: map588 Date: Sat, 4 Apr 2026 20:20:16 -0400 Subject: [PATCH 3/6] fix(watcher): add mutex to protect projects hash table from concurrent access The watcher's projects hash table was written by the main thread (watch/unwatch) and iterated by the watcher thread (poll_once) with no synchronization. Added cbm_mutex_t to the watcher struct and wrapped all hash table operations. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/watcher/watcher.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/watcher/watcher.c b/src/watcher/watcher.c index 8bef36e9..5f3ec760 100644 --- a/src/watcher/watcher.c +++ b/src/watcher/watcher.c @@ -20,6 +20,7 @@ #include "foundation/log.h" #include "foundation/hash_table.h" #include "foundation/compat.h" +#include "foundation/compat_thread.h" #include "foundation/compat_fs.h" #include "foundation/str_util.h" @@ -50,6 +51,7 @@ struct cbm_watcher { cbm_index_fn index_fn; void *user_data; CBMHashTable *projects; /* name → project_state_t* */ + cbm_mutex_t projects_lock; atomic_int stopped; }; @@ -236,6 +238,7 @@ cbm_watcher_t *cbm_watcher_new(cbm_store_t *store, cbm_index_fn index_fn, void * w->index_fn = index_fn; w->user_data = user_data; w->projects = cbm_ht_create(CBM_SZ_32); + cbm_mutex_init(&w->projects_lock); atomic_init(&w->stopped, 0); return w; } @@ -244,8 +247,11 @@ void cbm_watcher_free(cbm_watcher_t *w) { if (!w) { return; } + cbm_mutex_lock(&w->projects_lock); cbm_ht_foreach(w->projects, free_state_entry, NULL); cbm_ht_free(w->projects); + cbm_mutex_unlock(&w->projects_lock); + cbm_mutex_destroy(&w->projects_lock); free(w); } @@ -264,6 +270,7 @@ void cbm_watcher_watch(cbm_watcher_t *w, const char *project_name, const char *r } /* Remove old entry first (key points to state's project_name) */ + cbm_mutex_lock(&w->projects_lock); project_state_t *old = cbm_ht_get(w->projects, project_name); if (old) { cbm_ht_delete(w->projects, project_name); @@ -272,6 +279,7 @@ void cbm_watcher_watch(cbm_watcher_t *w, const char *project_name, const char *r project_state_t *s = state_new(project_name, root_path); cbm_ht_set(w->projects, s->project_name, s); + cbm_mutex_unlock(&w->projects_lock); cbm_log_info("watcher.watch", "project", project_name, "path", root_path); } @@ -279,10 +287,14 @@ void cbm_watcher_unwatch(cbm_watcher_t *w, const char *project_name) { if (!w || !project_name) { return; } + cbm_mutex_lock(&w->projects_lock); project_state_t *s = cbm_ht_get(w->projects, project_name); if (s) { cbm_ht_delete(w->projects, project_name); state_free(s); + } + cbm_mutex_unlock(&w->projects_lock); + if (s) { cbm_log_info("watcher.unwatch", "project", project_name); } } @@ -421,7 +433,9 @@ int cbm_watcher_poll_once(cbm_watcher_t *w) { .now = now_ns(), .reindexed = 0, }; + cbm_mutex_lock(&w->projects_lock); cbm_ht_foreach(w->projects, poll_project, &ctx); + cbm_mutex_unlock(&w->projects_lock); return ctx.reindexed; } From ac8fde696c4f58cf1beda0829f200283eaad041d Mon Sep 17 00:00:00 2001 From: map588 Date: Sat, 4 Apr 2026 20:20:24 -0400 Subject: [PATCH 4/6] feat(pipeline): emit CALLS edges for decorator applications Decorators previously only created DECORATES edges. A @login_required decorator was invisible to "find all references" queries because those look for CALLS and USAGE edges. Now resolve_decorator emits both DECORATES and CALLS edges. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/pipeline/pass_semantic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/pipeline/pass_semantic.c b/src/pipeline/pass_semantic.c index ef327801..253070d9 100644 --- a/src/pipeline/pass_semantic.c +++ b/src/pipeline/pass_semantic.c @@ -321,6 +321,9 @@ static void resolve_decorator(cbm_pipeline_ctx_t *ctx, const cbm_gbuf_node_t *no char props[CBM_SZ_256]; snprintf(props, sizeof(props), "{\"decorator\":\"%s\"}", decorator); cbm_gbuf_insert_edge(ctx->gbuf, node->id, dec->id, "DECORATES", props); + /* Also emit CALLS edge so decorator appears in "find all references" queries */ + cbm_gbuf_insert_edge(ctx->gbuf, node->id, dec->id, "CALLS", + "{\"kind\":\"decorator\"}"); (*count)++; } } From f6366bc278b28c9cbb3312aa9a5452123c802c21 Mon Sep 17 00:00:00 2001 From: map588 Date: Sun, 5 Apr 2026 16:12:18 -0400 Subject: [PATCH 5/6] feat(foundation): add safe_free, safe_str_free, safe_buf_free, safe_grow memory helpers Four new inline helpers in platform.h alongside existing safe_realloc: - safe_free(ptr): frees and NULLs any pointer (prevents double-free) - safe_str_free(&str): frees const char* and NULLs (replaces free((void*)str)) - safe_buf_free(buf, &count): frees array and zeros its count - safe_grow(arr, n, cap, factor): one-line capacity-doubling realloc --- src/foundation/platform.h | 42 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/foundation/platform.h b/src/foundation/platform.h index 5624810c..f0665d5c 100644 --- a/src/foundation/platform.h +++ b/src/foundation/platform.h @@ -31,6 +31,48 @@ static inline void *safe_realloc(void *ptr, size_t size) { return tmp; } +/* Safe free: frees and NULLs a pointer to prevent double-free / use-after-free. + * Accepts void** so it works with any pointer type via the macro. */ +static inline void safe_free_impl(void **pp) { + if (pp && *pp) { + free(*pp); + *pp = NULL; + } +} +#define safe_free(ptr) safe_free_impl((void **)(void *)&(ptr)) + +/* Safe string free: frees a const char* and NULLs it. + * Casts away const so callers don't need the (void*) dance. */ +static inline void safe_str_free(const char **sp) { + if (sp && *sp) { + free((void *)*sp); + *sp = NULL; + } +} + +/* Safe buffer free: frees a heap array and zeros its element count. + * Use for dynamic arrays paired with a size_t count. */ +static inline void safe_buf_free_impl(void **buf, size_t *count) { + if (buf && *buf) { + free(*buf); + *buf = NULL; + } + if (count) { + *count = 0; + } +} +#define safe_buf_free(buf, countp) safe_buf_free_impl((void **)(void *)&(buf), (countp)) + +/* Safe grow: doubles capacity and reallocs when count reaches cap. + * Usage: safe_grow(arr, count, cap, growth_factor) + * Evaluates to the new arr (NULL on OOM — old memory freed by safe_realloc). */ +#define safe_grow(arr, n, cap, factor) do { \ + if ((size_t)(n) >= (size_t)(cap)) { \ + (cap) *= (factor); \ + (arr) = safe_realloc((arr), (size_t)(cap) * sizeof(*(arr))); \ + } \ +} while (0) + /* ── Memory mapping ────────────────────────────────────────────── */ /* Map a file read-only into memory. Returns NULL on error. From d229cb6f16ccd9122f72c1a2a5e380ac5fd76cb6 Mon Sep 17 00:00:00 2001 From: map588 Date: Mon, 6 Apr 2026 10:31:54 -0400 Subject: [PATCH 6/6] removed local cruft --- .claude/agents/c-test-writer.md | 23 --------------- .claude/agents/security-reviewer.md | 20 ------------- .claude/settings.json | 26 ----------------- .claude/skills/add-language/SKILL.md | 39 -------------------------- .claude/skills/security-audit/SKILL.md | 23 --------------- .claude/worktrees/improvements | 1 - 6 files changed, 132 deletions(-) delete mode 100644 .claude/agents/c-test-writer.md delete mode 100644 .claude/agents/security-reviewer.md delete mode 100644 .claude/settings.json delete mode 100644 .claude/skills/add-language/SKILL.md delete mode 100644 .claude/skills/security-audit/SKILL.md delete mode 160000 .claude/worktrees/improvements diff --git a/.claude/agents/c-test-writer.md b/.claude/agents/c-test-writer.md deleted file mode 100644 index 425f91c5..00000000 --- a/.claude/agents/c-test-writer.md +++ /dev/null @@ -1,23 +0,0 @@ -You write C tests for a pure C11 codebase using the custom test framework in `tests/test_framework.h`. - -## Conventions - -- Use the `TEST(name)` macro to define test functions. -- Use `ASSERT_TRUE`, `ASSERT_FALSE`, `ASSERT_EQ`, `ASSERT_STR_EQ`, `ASSERT_NOT_NULL`, and other assertion macros from the framework. -- Each test must be self-contained with proper setup and teardown (especially freeing arenas and closing store handles). -- Tests compile with ASan + UBSan — no memory leaks, no undefined behavior. - -## Patterns to follow - -- **Store tests**: See `tests/test_store_nodes.c`, `tests/test_store_edges.c` — open a temporary in-memory store, perform operations, assert results, close store. -- **Pipeline tests**: See `tests/test_pipeline.c` — write source to a temp file, run pipeline passes, query the resulting graph. -- **Extraction tests**: See `tests/test_extraction.c` — parse source with tree-sitter, verify extracted functions/classes/calls. -- **MCP tests**: See `tests/test_mcp.c` — construct JSON-RPC requests, call handlers, verify JSON responses. -- **Foundation tests**: See `tests/test_arena.c`, `tests/test_hash_table.c` — unit test data structures directly. - -## Build and run - -```bash -scripts/test.sh # Full suite with sanitizers -make -f Makefile.cbm test-foundation # Foundation tests only (fast) -``` diff --git a/.claude/agents/security-reviewer.md b/.claude/agents/security-reviewer.md deleted file mode 100644 index 4dcd93f9..00000000 --- a/.claude/agents/security-reviewer.md +++ /dev/null @@ -1,20 +0,0 @@ -You are a security reviewer for a pure C11 codebase that implements an MCP server. - -## What to check - -1. **Dangerous calls** — Any new `system()`, `popen()`, `fork()`, `exec*()`, or network calls must be listed in `scripts/security-allowlist.txt`. Flag any that are missing. -2. **Buffer safety** — Look for unbounded `strcpy`, `sprintf`, `strcat`, `gets`. All should use bounded variants (`strncpy`, `snprintf`, arena-allocated buffers). -3. **SQL injection** — All queries in `src/store/store.c` must use parameterized statements (`sqlite3_bind_*`). Flag any string-concatenated SQL. -4. **Prompt injection** — MCP tool handlers in `src/mcp/mcp.c` must validate and sanitize all user-provided input before including it in responses or graph queries. -5. **Memory safety** — Check for use-after-free, double-free, null dereference, and uninitialized reads. The project uses arena allocators (`src/foundation/arena.c`) — verify allocations go through arenas where appropriate. -6. **NOLINT usage** — Any `// NOLINT` suppression must be whitelisted in `src/foundation/recursion_whitelist.h`. Flag unwhitelisted suppressions. -7. **Integer overflow** — Check size calculations, especially in allocation paths and buffer length computations. - -## How to verify - -Run the 8-layer security audit: -```bash -make -f Makefile.cbm security -``` - -Review `scripts/security-allowlist.txt` for the current allow-list of dangerous calls. diff --git a/.claude/settings.json b/.claude/settings.json deleted file mode 100644 index a897c34e..00000000 --- a/.claude/settings.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "hooks": { - "PostToolUse": [ - { - "matcher": "Edit|Write", - "hooks": [ - { - "type": "command", - "command": "if echo \"$CLAUDE_FILE_PATH\" | grep -qE '\\.([ch])$'; then clang-format -i \"$CLAUDE_FILE_PATH\"; fi" - } - ] - } - ], - "PreToolUse": [ - { - "matcher": "Edit|Write", - "hooks": [ - { - "type": "command", - "command": "if echo \"$CLAUDE_FILE_PATH\" | grep -qE '(vendored/|internal/cbm/grammar_)'; then echo 'BLOCKED: Do not edit vendored or generated grammar files' >&2; exit 1; fi" - } - ] - } - ] - } -} diff --git a/.claude/skills/add-language/SKILL.md b/.claude/skills/add-language/SKILL.md deleted file mode 100644 index 02e9061f..00000000 --- a/.claude/skills/add-language/SKILL.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -name: add-language -description: Guide through adding or fixing language support (tree-sitter extraction + pipeline passes) -disable-model-invocation: true ---- - -# Adding Language Support - -Language support has two layers. Determine which type of language you're adding: - -## Standard Languages (need tree-sitter grammar) - -1. **Add grammar** — Vendor the tree-sitter grammar into `internal/cbm/grammar_.c` using `scripts/vendor-grammar.sh` -2. **Configure node types** — Add language entry in `internal/cbm/lang_specs.c` with AST node types for functions, classes, calls, imports -3. **Write extractor** — Create `internal/cbm/extract_.c` for language-specific extraction logic -4. **Add enum** — Add `CBM_LANG_` to `internal/cbm/cbm.h` -5. **Hook into pipeline** — Update `src/pipeline/pipeline.c` for call resolution, usage tracking -6. **Add tests**: - - `tests/test_extraction.c` — AST extraction regression tests - - `tests/test_pipeline.c` — Integration-level pipeline tests - -## Infrastructure Languages (Dockerfile, K8s, etc. — no new grammar needed) - -Follow the **infra-pass pattern**: - -1. **Detection helper** — Add `cbm_is__file()` in `src/pipeline/pass_infrascan.c` -2. **Enum value** — Add `CBM_LANG_` in `internal/cbm/cbm.h` and row in `lang_specs.c` -3. **Custom extractor** — Write extractor returning `CBMFileResult*` (reuse YAML grammar if applicable) -4. **Pipeline pass** — Register in `pipeline.c` -5. **Tests** — Follow `TEST(infra_is_dockerfile)` and `TEST(k8s_extract_manifest)` patterns in `tests/test_pipeline.c` - -## Verification - -```bash -scripts/test.sh # Full test suite -scripts/lint.sh # Must pass all linters -``` - -Test against a real open-source repo that uses the language. diff --git a/.claude/skills/security-audit/SKILL.md b/.claude/skills/security-audit/SKILL.md deleted file mode 100644 index c23e7bf6..00000000 --- a/.claude/skills/security-audit/SKILL.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -name: security-audit -description: Run the full 8-layer security audit and analyze results ---- - -Run the 8-layer security audit: - -```bash -make -f Makefile.cbm security -``` - -Analyze the output. The 8 layers are: - -1. **Static allow-list audit** — Checks for dangerous calls (`system`, `popen`, `fork`, network) not in `scripts/security-allowlist.txt` -2. **Binary string scan** — Searches compiled binary for suspicious strings -3. **UI audit** — Validates embedded frontend assets -4. **Install audit** — Checks install scripts for unsafe operations -5. **Network egress test** — Verifies no unauthorized network access -6. **MCP robustness (fuzz)** — Sends malformed JSON-RPC to test input handling -7. **Vendored dependency integrity** — Verifies vendored source checksums -8. **Frontend integrity** — Checks graph-ui build artifacts - -For each failure, explain what the layer checks, why it failed, and how to fix it. If a new dangerous call is intentional, guide adding it to `scripts/security-allowlist.txt`. diff --git a/.claude/worktrees/improvements b/.claude/worktrees/improvements deleted file mode 160000 index 1d30971f..00000000 --- a/.claude/worktrees/improvements +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 1d30971ff0f7a817e2e60f8c16f604e893a73166