Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions Makefile.cbm
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ FOUNDATION_SRCS = \
src/foundation/compat_regex.c \
src/foundation/mem.c \
src/foundation/diagnostics.c \
src/foundation/profile.c
src/foundation/profile.c \
src/foundation/dump_verify.c

# Existing extraction C code (compiled from current location)
EXTRACTION_SRCS = \
Expand Down Expand Up @@ -292,7 +293,8 @@ TEST_FOUNDATION_SRCS = \
tests/test_str_intern.c \
tests/test_log.c \
tests/test_str_util.c \
tests/test_platform.c
tests/test_platform.c \
tests/test_dump_verify.c

TEST_EXTRACTION_SRCS = \
tests/test_extraction.c \
Expand All @@ -310,7 +312,8 @@ TEST_STORE_SRCS = \
tests/test_store_arch.c \
tests/test_store_bulk.c \
tests/test_store_pragmas.c \
tests/test_store_checkpoint.c
tests/test_store_checkpoint.c \
tests/test_dump_verify_io.c

TEST_CYPHER_SRCS = \
tests/test_cypher.c
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,7 @@ codebase-memory-mcp config reset auto_index # reset to default
| `CBM_DOWNLOAD_URL` | *(GitHub releases)* | Override the download URL for updates. Used for testing or self-hosted deployments. |
| `CBM_LOG_LEVEL` | `info` | Set the minimum log level. Accepted values (case-insensitive): `debug`, `info`, `warn`, `error`, `none` — or their numeric equivalents `0`–`4` matching the internal enum. Logs go to stderr; stdout is reserved for MCP JSON-RPC. |
| `CBM_WORKERS` | *(detected)* | Override the parallel-indexing worker count returned by `cbm_default_worker_count`. Useful inside containers where `sysconf(_SC_NPROCESSORS_ONLN)` reports host CPUs rather than the cgroup's effective quota. Range 1–256; invalid values are ignored with a warning. |
| `CBM_DUMP_VERIFY_MIN_RATIO` | `0.5` | After indexing, compare persisted SQLite node count to the in-memory dump count. When persisted nodes fall below this fraction of committed nodes (and committed > 50), `index_repository` returns `status:"degraded"` instead of silent `indexed`. Range 0–1; set `0` to disable. Invalid values are ignored with a warning. |

```bash
# Store indexes in a custom directory
Expand Down
40 changes: 40 additions & 0 deletions src/foundation/dump_verify.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* dump_verify.c — Post-dump plausibility gate (#334).
*/
#include "foundation/dump_verify.h"
#include "foundation/constants.h"
#include "foundation/log.h"
#include "foundation/platform.h"

#include <stdlib.h>
#include <string.h>

bool cbm_dump_verify_is_degraded(int committed_nodes, int persisted_nodes, double ratio,
int min_floor) {
if (ratio <= 0.0) {
return false;
}
if (committed_nodes < 0) {
return false;
}
if (committed_nodes <= min_floor) {
return false;
}
if (persisted_nodes < 0) {
return true;
}
return (double)persisted_nodes < (double)committed_nodes * ratio;
}

double cbm_dump_verify_min_ratio(void) {
char buf[CBM_SZ_32];
if (cbm_safe_getenv("CBM_DUMP_VERIFY_MIN_RATIO", buf, sizeof(buf), NULL) != NULL) {
char *end = NULL;
double r = strtod(buf, &end);
if (end != buf && r >= 0.0 && r <= 1.0) {
return r;
}
cbm_log_warn("dump_verify.env.invalid", "value", buf, "fallback", "0.5");
}
return CBM_DUMP_VERIFY_DEFAULT_RATIO;
}
32 changes: 32 additions & 0 deletions src/foundation/dump_verify.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
* dump_verify.h — Post-dump plausibility gate (#334 design b).
*
* Compares committed in-memory node counts against persisted SQLite rows
* after index_repository completes. Nodes-only gate (edges shrink legitimately
* at dump when endpoints fail to resolve).
*/
#ifndef CBM_DUMP_VERIFY_H
#define CBM_DUMP_VERIFY_H

#include <stdbool.h>

/** Repos with at most this many committed nodes skip the ratio gate. */
enum { CBM_DUMP_VERIFY_MIN_FLOOR = 50 };

/** Default minimum persisted/committed ratio when env is unset. */
#define CBM_DUMP_VERIFY_DEFAULT_RATIO 0.5

/**
* True when persisted_nodes is implausibly below committed_nodes.
*
* Returns false when ratio <= 0 (gate disabled), committed_nodes < 0 (no dump),
* committed_nodes <= min_floor (sparse repo), or persisted >= committed * ratio.
* Returns true when persisted_nodes < 0 (count error).
*/
bool cbm_dump_verify_is_degraded(int committed_nodes, int persisted_nodes, double ratio,
int min_floor);

/** Read CBM_DUMP_VERIFY_MIN_RATIO (0..1); invalid/unset -> default 0.5. Set 0 to disable. */
double cbm_dump_verify_min_ratio(void);

#endif /* CBM_DUMP_VERIFY_H */
84 changes: 70 additions & 14 deletions src/mcp/mcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ enum {
#include "foundation/compat_thread.h"
#include "foundation/log.h"
#include "foundation/str_util.h"
#include "foundation/dump_verify.h"
#include "foundation/compat_regex.h"
#include "pipeline/artifact.h"

Expand Down Expand Up @@ -2525,28 +2526,82 @@ static void add_excluded_summary(yyjson_mut_doc *doc, yyjson_mut_val *root, char
yyjson_mut_obj_add_val(doc, root, "excluded", excluded);
}

/* Build the success portion of the index_repository response. */
static void build_index_success_response(cbm_mcp_server_t *srv, yyjson_mut_doc *doc,
/* Build the success portion of the index_repository response.
* Returns true when status should be "degraded" (#334 plausibility gate). */
static bool build_index_success_response(cbm_mcp_server_t *srv, yyjson_mut_doc *doc,
yyjson_mut_val *root, const char *project_name,
const char *repo_path, bool persistence,
const char *repo_path, bool persistence, cbm_pipeline_t *p,
char **excluded_dirs, int excluded_count) {
add_excluded_summary(doc, root, excluded_dirs, excluded_count);

int exp_nodes = -1;
int exp_edges = -1;
cbm_pipeline_get_committed_counts(p, &exp_nodes, &exp_edges);

const double ratio = cbm_dump_verify_min_ratio();
const int min_floor = CBM_DUMP_VERIFY_MIN_FLOOR;

cbm_store_t *store = resolve_store(srv, project_name);
int nodes = 0;
int edges = 0;
bool degraded = false;

if (!store) {
return;
degraded = true;
} else {
nodes = cbm_store_count_nodes(store, project_name);
edges = cbm_store_count_edges(store, project_name);
if (nodes < 0) {
degraded = true;
nodes = 0;
edges = edges >= 0 ? edges : 0;
} else if (cbm_dump_verify_is_degraded(exp_nodes, nodes, ratio, min_floor)) {
(void)cbm_store_checkpoint(store);
int nodes2 = cbm_store_count_nodes(store, project_name);
int edges2 = cbm_store_count_edges(store, project_name);
if (nodes2 >= 0) {
nodes = nodes2;
}
if (edges2 >= 0) {
edges = edges2;
}
degraded = cbm_dump_verify_is_degraded(exp_nodes, nodes, ratio, min_floor);
}
}
int nodes = cbm_store_count_nodes(store, project_name);
int edges = cbm_store_count_edges(store, project_name);

yyjson_mut_obj_add_int(doc, root, "nodes", nodes);
yyjson_mut_obj_add_int(doc, root, "edges", edges);
if (exp_nodes >= 0) {
yyjson_mut_obj_add_int(doc, root, "expected_nodes", exp_nodes);
yyjson_mut_obj_add_int(doc, root, "expected_edges", exp_edges);
}

if (degraded) {
if (!store) {
yyjson_mut_obj_add_str(doc, root, "hint",
"Index database failed integrity check and was removed. "
"Re-run index_repository(repo_path=...) to rebuild.");
cbm_log_warn("dump.verify", "reason", "store_missing", "expected_nodes",
exp_nodes >= 0 ? "set" : "unknown");
} else {
char exp_buf[MCP_FIELD_SIZE];
char got_buf[MCP_FIELD_SIZE];
snprintf(exp_buf, sizeof(exp_buf), "%d", exp_nodes);
snprintf(got_buf, sizeof(got_buf), "%d", nodes);
yyjson_mut_obj_add_str(
doc, root, "hint",
"Persisted far fewer nodes than indexed — likely durability loss from a "
"hard-killed sibling process. Re-run index_repository(repo_path=...) to rebuild.");
cbm_log_warn("dump.verify", "expected_nodes", exp_buf, "persisted_nodes", got_buf);
}
}

char adr_path[CBM_SZ_4K];
snprintf(adr_path, sizeof(adr_path), "%s/.codebase-memory/adr.md", repo_path);
struct stat adr_st;
bool adr_exists = (stat(adr_path, &adr_st) == 0);
yyjson_mut_obj_add_bool(doc, root, "adr_present", adr_exists);
if (!adr_exists) {
if (!adr_exists && !degraded) {
yyjson_mut_obj_add_str(
doc, root, "adr_hint",
"Project indexed. Consider creating an Architecture Decision Record: "
Expand All @@ -2561,6 +2616,8 @@ static void build_index_success_response(cbm_mcp_server_t *srv, yyjson_mut_doc *
"Persistent artifact written to .codebase-memory/graph.db.zst. "
"Commit this file to share the index with teammates.");
}

return degraded;
}

static char *handle_index_repository(cbm_mcp_server_t *srv, const char *args) {
Expand Down Expand Up @@ -2641,19 +2698,18 @@ static char *handle_index_repository(cbm_mcp_server_t *srv, const char *args) {
yyjson_mut_doc_set_root(doc, root);

yyjson_mut_obj_add_str(doc, root, "project", project_name);
yyjson_mut_obj_add_str(doc, root, "status", rc == 0 ? "indexed" : "error");

if (rc != 0) {
if (rc == 0) {
bool degraded = build_index_success_response(srv, doc, root, project_name, repo_path,
persistence, p, excluded_dirs, excluded_count);
yyjson_mut_obj_add_str(doc, root, "status", degraded ? "degraded" : "indexed");
} else {
yyjson_mut_obj_add_str(doc, root, "status", "error");
yyjson_mut_obj_add_str(doc, root, "hint",
"Pipeline failed. Check repo_path exists and contains source files. "
"Try mode='fast' for a quicker diagnostic run.");
}

if (rc == 0) {
build_index_success_response(srv, doc, root, project_name, repo_path, persistence,
excluded_dirs, excluded_count);
}

char *json = yy_doc_to_str(doc);
yyjson_mut_doc_free(doc);
/* Free the pipeline only after the response doc copied the excluded list. */
Expand Down
17 changes: 17 additions & 0 deletions src/pipeline/pipeline.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ struct cbm_pipeline {

/* User-defined extension overrides (loaded once per run) */
cbm_userconfig_t *userconfig;

/* Committed graph size at dump time (-1 = dump did not run). #334 gate axis. */
int committed_nodes;
int committed_edges;
};

/* ── Global pkgmap (one active pipeline at a time) ─────────────── */
Expand Down Expand Up @@ -149,6 +153,8 @@ cbm_pipeline_t *cbm_pipeline_new(const char *repo_path, const char *db_path,
p->project_name = cbm_project_name_from_path(repo_path);
p->mode = mode;
p->persistence = false;
p->committed_nodes = -1;
p->committed_edges = -1;
atomic_init(&p->cancelled, 0);

return p;
Expand Down Expand Up @@ -211,6 +217,15 @@ void cbm_pipeline_get_excluded(const cbm_pipeline_t *p, char ***out, int *count)
}
}

void cbm_pipeline_get_committed_counts(const cbm_pipeline_t *p, int *nodes, int *edges) {
if (nodes) {
*nodes = p ? p->committed_nodes : -1;
}
if (edges) {
*edges = p ? p->committed_edges : -1;
}
}

/* Resolve the DB path for this pipeline. Caller must free(). */
static char *resolve_db_path(const cbm_pipeline_t *p) {
char *path = malloc(CBM_SZ_1K);
Expand Down Expand Up @@ -814,6 +829,8 @@ static int dump_and_persist_hashes(cbm_pipeline_t *p, const cbm_file_info_t *fil
cbm_log_error("pipeline.err", "phase", "dump");
return rc;
}
p->committed_nodes = cbm_gbuf_node_count(p->gbuf);
p->committed_edges = cbm_gbuf_edge_count(p->gbuf);
cbm_log_info("pass.timing", "pass", "dump", "elapsed_ms", itoa_buf((int)elapsed_ms(*t)));
cbm_store_t *hash_store = cbm_store_open_path(db_path);
if (hash_store) {
Expand Down
4 changes: 4 additions & 0 deletions src/pipeline/pipeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ int cbm_pipeline_get_mode(const cbm_pipeline_t *p);
* to NULL/0 when p is NULL or nothing was excluded. Do not free. */
void cbm_pipeline_get_excluded(const cbm_pipeline_t *p, char ***out, int *count);

/* Committed node/edge counts captured at dump time (-1 when dump did not run).
* Nodes are the #334 plausibility-gate axis; edges are informational only. */
void cbm_pipeline_get_committed_counts(const cbm_pipeline_t *p, int *nodes, int *edges);

/* ── Index lock (prevents concurrent pipeline runs on same DB) ──── */

/* Try to acquire the global index lock. Returns true if acquired,
Expand Down
89 changes: 89 additions & 0 deletions tests/test_dump_verify.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/*
* test_dump_verify.c — Post-dump plausibility gate (#334).
*
* Pure-function matrix mirrors sast-ai-app checkSilentDegradation cases.
* I/O-level coverage that drives the gate against a real on-disk SQLite store
* lives in test_dump_verify_io.c (store-linked, excluded from test-foundation).
*/
#include "../src/foundation/compat.h"
#include "../src/foundation/dump_verify.h"
#include "test_framework.h"

#include <stdlib.h>

TEST(dump_verify_no_baseline) {
ASSERT_FALSE(cbm_dump_verify_is_degraded(-1, 500, 0.5, CBM_DUMP_VERIFY_MIN_FLOOR));
PASS();
}

TEST(dump_verify_sparse_at_floor) {
ASSERT_FALSE(cbm_dump_verify_is_degraded(50, 10, 0.5, CBM_DUMP_VERIFY_MIN_FLOOR));
ASSERT_FALSE(cbm_dump_verify_is_degraded(12, 5, 0.5, CBM_DUMP_VERIFY_MIN_FLOOR));
PASS();
}

TEST(dump_verify_shortfall_below_ratio) {
ASSERT_TRUE(cbm_dump_verify_is_degraded(1000, 400, 0.5, CBM_DUMP_VERIFY_MIN_FLOOR));
PASS();
}

TEST(dump_verify_just_above_ratio) {
ASSERT_FALSE(cbm_dump_verify_is_degraded(1000, 500, 0.5, CBM_DUMP_VERIFY_MIN_FLOOR));
PASS();
}

TEST(dump_verify_just_below_ratio) {
ASSERT_TRUE(cbm_dump_verify_is_degraded(1000, 499, 0.5, CBM_DUMP_VERIFY_MIN_FLOOR));
PASS();
}

TEST(dump_verify_zero_persisted) {
ASSERT_TRUE(cbm_dump_verify_is_degraded(1000, 0, 0.5, CBM_DUMP_VERIFY_MIN_FLOOR));
PASS();
}

TEST(dump_verify_growth) {
ASSERT_FALSE(cbm_dump_verify_is_degraded(500, 750, 0.5, CBM_DUMP_VERIFY_MIN_FLOOR));
PASS();
}

TEST(dump_verify_count_error) {
ASSERT_TRUE(cbm_dump_verify_is_degraded(1000, -1, 0.5, CBM_DUMP_VERIFY_MIN_FLOOR));
PASS();
}

TEST(dump_verify_ratio_zero_disables) {
ASSERT_FALSE(cbm_dump_verify_is_degraded(1000, 10, 0.0, CBM_DUMP_VERIFY_MIN_FLOOR));
PASS();
}

TEST(dump_verify_loosened_ratio) {
ASSERT_FALSE(cbm_dump_verify_is_degraded(1000, 600, 0.5, CBM_DUMP_VERIFY_MIN_FLOOR));
PASS();
}

TEST(dump_verify_tightened_ratio) {
ASSERT_TRUE(cbm_dump_verify_is_degraded(1000, 900, 0.95, CBM_DUMP_VERIFY_MIN_FLOOR));
PASS();
}

TEST(dump_verify_edges_shrank_nodes_ok) {
/* Edges are not gated; this documents nodes-only semantics for integrators. */
ASSERT_FALSE(cbm_dump_verify_is_degraded(200, 200, 0.5, CBM_DUMP_VERIFY_MIN_FLOOR));
PASS();
}

SUITE(dump_verify) {
RUN_TEST(dump_verify_no_baseline);
RUN_TEST(dump_verify_sparse_at_floor);
RUN_TEST(dump_verify_shortfall_below_ratio);
RUN_TEST(dump_verify_just_above_ratio);
RUN_TEST(dump_verify_just_below_ratio);
RUN_TEST(dump_verify_zero_persisted);
RUN_TEST(dump_verify_growth);
RUN_TEST(dump_verify_count_error);
RUN_TEST(dump_verify_ratio_zero_disables);
RUN_TEST(dump_verify_loosened_ratio);
RUN_TEST(dump_verify_tightened_ratio);
RUN_TEST(dump_verify_edges_shrank_nodes_ok);
}
Loading
Loading