diff --git a/Makefile.cbm b/Makefile.cbm index 3ff50b81..715273cd 100644 --- a/Makefile.cbm +++ b/Makefile.cbm @@ -200,6 +200,7 @@ PIPELINE_SRCS = \ src/pipeline/pass_semantic_edges.c \ src/pipeline/pass_complexity.c \ src/pipeline/pass_cross_repo.c \ + src/pipeline/pass_cross_repo_maven.c \ src/pipeline/artifact.c \ src/pipeline/pass_pkgmap.c diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index d3aa3bf3..6701a6a1 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -273,7 +273,8 @@ static const tool_def_t TOOLS[] = { {"index_repository", "Index a repository into the knowledge graph. " "Special mode 'cross-repo-intelligence': skip extraction, only match Routes/Channels " - "across projects to create CROSS_HTTP_CALLS/CROSS_ASYNC_CALLS/CROSS_CHANNEL edges. " + "and Maven library dependencies across projects to create CROSS_HTTP_CALLS/" + "CROSS_ASYNC_CALLS/CROSS_CHANNEL/CROSS_LIBRARY_DEPENDS_ON/CROSS_LIBRARY_USED_BY edges. " "Requires target_projects param. Ensure target projects have fresh indexes first.", "{\"type\":\"object\",\"properties\":{\"repo_path\":{\"type\":\"string\",\"description\":" "\"Path to the repository\"}," @@ -282,7 +283,8 @@ static const tool_def_t TOOLS[] = { "\"default\":\"full\",\"description\":\"All modes run type-aware LSP call/usage " "resolution (per-file + cross-file). full: all files + similarity/semantic edges. " "moderate: filtered files + similarity/semantic. fast: filtered files, no " - "similarity/semantic. cross-repo-intelligence: match Routes/Channels across projects.\"}," + "similarity/semantic. cross-repo-intelligence: match Routes/Channels and Maven library " + "dependencies across projects.\"}," "\"target_projects\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}," "\"description\":\"Projects to search for cross-repo links (cross-repo-intelligence mode). " "Use [\\\"*\\\"] for all indexed projects. Run list_projects to see available projects.\"}," @@ -1813,9 +1815,10 @@ static void append_cross_repo_summary(yyjson_mut_doc *doc, yyjson_mut_val *root, /* Scan edge types for any CROSS_* edges and sum them */ int cross_total = 0; yyjson_mut_val *cr = yyjson_mut_obj(doc); - static const char *cross_types[] = {"CROSS_HTTP_CALLS", "CROSS_ASYNC_CALLS", - "CROSS_CHANNEL", "CROSS_GRPC_CALLS", - "CROSS_GRAPHQL_CALLS", "CROSS_TRPC_CALLS"}; + static const char *cross_types[] = { + "CROSS_HTTP_CALLS", "CROSS_ASYNC_CALLS", "CROSS_CHANNEL", + "CROSS_GRPC_CALLS", "CROSS_GRAPHQL_CALLS", "CROSS_TRPC_CALLS", + "CROSS_LIBRARY_DEPENDS_ON", "CROSS_LIBRARY_USED_BY"}; for (int t = 0; t < (int)(sizeof(cross_types) / sizeof(cross_types[0])); t++) { for (int i = 0; i < schema->edge_type_count; i++) { if (strcmp(schema->edge_types[i].type, cross_types[t]) == 0) { @@ -2462,7 +2465,7 @@ static char *handle_cross_repo_mode(const char *repo_path, const char *args) { yyjson_doc_free(jdoc); int total = result.http_edges + result.async_edges + result.channel_edges + result.grpc_edges + - result.graphql_edges + result.trpc_edges; + result.graphql_edges + result.trpc_edges + result.library_edges; yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); yyjson_mut_val *root = yyjson_mut_obj(doc); yyjson_mut_doc_set_root(doc, root); @@ -2476,6 +2479,7 @@ static char *handle_cross_repo_mode(const char *repo_path, const char *args) { yyjson_mut_obj_add_int(doc, root, "cross_grpc_calls", result.grpc_edges); yyjson_mut_obj_add_int(doc, root, "cross_graphql_calls", result.graphql_edges); yyjson_mut_obj_add_int(doc, root, "cross_trpc_calls", result.trpc_edges); + yyjson_mut_obj_add_int(doc, root, "cross_library_edges", result.library_edges); yyjson_mut_obj_add_int(doc, root, "total_cross_edges", total); yyjson_mut_obj_add_real(doc, root, "elapsed_ms", result.elapsed_ms); diff --git a/src/pipeline/pass_cross_repo.c b/src/pipeline/pass_cross_repo.c index 07f5ca7e..46a69f5b 100644 --- a/src/pipeline/pass_cross_repo.c +++ b/src/pipeline/pass_cross_repo.c @@ -10,6 +10,7 @@ * get a CROSS_* edge so the link is visible from either side. */ #include "pipeline/pass_cross_repo.h" +#include "pipeline/pass_cross_repo_maven.h" #include "foundation/constants.h" #include "foundation/log.h" #include "foundation/platform.h" @@ -46,6 +47,25 @@ static const char *cr_itoa(int v) { return cr_ibuf; } +bool cbm_cross_repo_project_list_alloc(char ***out, int cap, void *(*malloc_fn)(size_t)) { + if (!out) { + return false; + } + *out = NULL; + if (cap <= 0 || !malloc_fn) { + return false; + } + if ((size_t)cap > SIZE_MAX / sizeof(char *)) { + return false; + } + char **items = malloc_fn((size_t)cap * sizeof(*items)); + if (!items) { + return false; + } + *out = items; + return true; +} + /* ── Helpers ─────────────────────────────────────────────────────── */ static const char *cr_cache_dir(void) { @@ -112,6 +132,20 @@ static void delete_cross_edges(cbm_store_t *store, const char *project) { cbm_store_delete_edges_by_type(store, project, "CROSS_GRPC_CALLS"); cbm_store_delete_edges_by_type(store, project, "CROSS_GRAPHQL_CALLS"); cbm_store_delete_edges_by_type(store, project, "CROSS_TRPC_CALLS"); + cbm_store_delete_edges_by_type(store, project, "CROSS_LIBRARY_DEPENDS_ON"); + struct sqlite3 *db = cbm_store_get_db(store); + if (!db) { + return; + } + sqlite3_stmt *st = NULL; + if (sqlite3_prepare_v2(db, + "DELETE FROM nodes WHERE project=?1 AND label='Library' " + "AND qualified_name GLOB '__library__*'", + CBM_NOT_FOUND, &st, NULL) == SQLITE_OK) { + sqlite3_bind_text(st, SKIP_ONE, project, CBM_NOT_FOUND, SQLITE_STATIC); + sqlite3_step(st); + sqlite3_finalize(st); + } } /* Insert a CROSS_* edge into a store. */ @@ -561,7 +595,12 @@ static int collect_all_projects(char ***out) { int cap = CR_INIT_CAP; int count = 0; - char **projects = malloc((size_t)cap * sizeof(char *)); + char **projects = NULL; + if (!cbm_cross_repo_project_list_alloc(&projects, cap, malloc)) { + cbm_closedir(d); + *out = NULL; + return 0; + } cbm_dirent_t *ent; while ((ent = cbm_readdir(d)) != NULL) { @@ -576,15 +615,25 @@ static int collect_all_projects(char ***out) { continue; } if (count >= cap) { - cap *= PAIR_LEN; - char **tmp = realloc(projects, (size_t)cap * sizeof(char *)); + if (cap > INT32_MAX / PAIR_LEN) { + break; + } + int new_cap = cap * PAIR_LEN; + if ((size_t)new_cap > SIZE_MAX / sizeof(*projects)) { + break; + } + char **tmp = realloc(projects, (size_t)new_cap * sizeof(*projects)); if (!tmp) { break; } + cap = new_cap; projects = tmp; } /* Strip .db extension */ projects[count] = malloc(len - PAIR_LEN); + if (!projects[count]) { + break; + } memcpy(projects[count], ent->name, len - CR_DB_EXT_LEN); projects[count][len - CR_DB_EXT_LEN] = '\0'; count++; @@ -660,6 +709,8 @@ cbm_cross_repo_result_t cbm_cross_repo_match(const char *project, const char **t "operation", "CROSS_GRAPHQL_CALLS"); result.trpc_edges += match_typed_routes(src_store, project, tgt_store, tgt, "TRPC_CALLS", "procedure", "procedure", "CROSS_TRPC_CALLS"); + result.library_edges += + cbm_cross_repo_match_maven_libraries(src_store, project, tgt_store, tgt); result.projects_scanned++; cbm_store_close(tgt_store); @@ -677,7 +728,7 @@ cbm_cross_repo_result_t cbm_cross_repo_match(const char *project, const char **t ((double)(t1.tv_nsec - t0.tv_nsec) / CR_NS_PER_MS); int total = result.http_edges + result.async_edges + result.channel_edges + result.grpc_edges + - result.graphql_edges + result.trpc_edges; + result.graphql_edges + result.trpc_edges + result.library_edges; cbm_log_info("cross_repo.done", "project", project, "total", cr_itoa(total)); return result; diff --git a/src/pipeline/pass_cross_repo.h b/src/pipeline/pass_cross_repo.h index 5d2d4cfe..87111e84 100644 --- a/src/pipeline/pass_cross_repo.h +++ b/src/pipeline/pass_cross_repo.h @@ -15,6 +15,7 @@ typedef struct { int grpc_edges; /* CROSS_GRPC_CALLS edges created */ int graphql_edges; /* CROSS_GRAPHQL_CALLS edges created */ int trpc_edges; /* CROSS_TRPC_CALLS edges created */ + int library_edges; /* CROSS_LIBRARY_DEPENDS_ON/CROSS_LIBRARY_USED_BY pairs created */ int projects_scanned; double elapsed_ms; } cbm_cross_repo_result_t; diff --git a/src/pipeline/pass_cross_repo_maven.c b/src/pipeline/pass_cross_repo_maven.c new file mode 100644 index 00000000..8681309f --- /dev/null +++ b/src/pipeline/pass_cross_repo_maven.c @@ -0,0 +1,812 @@ +/* + * pass_cross_repo_maven.c - Cross-repo Maven artifact dependency matching. + */ +#include "pipeline/pass_cross_repo_maven.h" +#include "foundation/constants.h" + +#include +#include +#include +#include +#include +#include + +enum { + MCR_MAX_EDGES = 4096, + MCR_MAX_POM_SIZE = 2 * 1024 * 1024, + MCR_INIT_CAP = 32, +}; + +typedef struct { + char *group_id; + char *artifact_id; + char *pom_path; +} mcr_artifact_t; + +typedef struct { + char *group_id; + char *artifact_id; + char *version; + char *scope; + char *pom_path; +} mcr_dependency_t; + +static char *dup_cstr(const char *s) { + const char *value = s ? s : ""; + char *copy = malloc(strlen(value) + SKIP_ONE); + if (copy) { + strcpy(copy, value); + } + return copy; +} + +static char *join_root_path(const char *root, const char *rel) { + if (!root || !rel) { + return NULL; + } + size_t root_len = strlen(root); + size_t rel_len = strlen(rel); + if (root_len > SIZE_MAX - rel_len - PAIR_LEN) { + return NULL; + } + size_t len = root_len + SKIP_ONE + rel_len; + char *path = malloc(len + SKIP_ONE); + if (!path) { + return NULL; + } + snprintf(path, len + SKIP_ONE, "%s/%s", root, rel); + return path; +} + +static char *format_prefixed_qn(const char *prefix, const char *value) { + if (!prefix || !value) { + return NULL; + } + int needed = snprintf(NULL, 0, "%s%s", prefix, value); + if (needed < 0) { + return NULL; + } + char *qn = malloc((size_t)needed + SKIP_ONE); + if (!qn) { + return NULL; + } + snprintf(qn, (size_t)needed + SKIP_ONE, "%s%s", prefix, value); + return qn; +} + +bool cbm_cross_repo_maven_grow_array(void **items, int *cap, size_t elem_size, + void *(*realloc_fn)(void *, size_t)) { + if (!items || !*items || !cap || *cap <= 0 || elem_size == 0 || !realloc_fn) { + return false; + } + if (*cap > INT32_MAX / PAIR_LEN) { + return false; + } + int new_cap = *cap * PAIR_LEN; + if ((size_t)new_cap > SIZE_MAX / elem_size) { + return false; + } + void *tmp = realloc_fn(*items, (size_t)new_cap * elem_size); + if (!tmp) { + return false; + } + *items = tmp; + *cap = new_cap; + return true; +} + +static void free_artifacts(mcr_artifact_t *items, int count) { + if (!items) { + return; + } + for (int i = 0; i < count; i++) { + free(items[i].group_id); + free(items[i].artifact_id); + free(items[i].pom_path); + } + free(items); +} + +static void free_dependency_fields(mcr_dependency_t *dep) { + if (!dep) { + return; + } + free(dep->group_id); + free(dep->artifact_id); + free(dep->version); + free(dep->scope); + free(dep->pom_path); + memset(dep, 0, sizeof(*dep)); +} + +static void free_dependencies(mcr_dependency_t *items, int count) { + if (!items) { + return; + } + for (int i = 0; i < count; i++) { + free_dependency_fields(&items[i]); + } + free(items); +} + +static bool text_has_suffix(const char *s, const char *suffix) { + if (!s || !suffix) { + return false; + } + size_t sl = strlen(s); + size_t tl = strlen(suffix); + return sl >= tl && strcmp(s + sl - tl, suffix) == 0; +} + +static char *trim_dup(const char *start, size_t len) { + if (!start) { + return NULL; + } + while (len > 0 && (*start == ' ' || *start == '\n' || *start == '\r' || *start == '\t')) { + start++; + len--; + } + while (len > 0) { + char c = start[len - SKIP_ONE]; + if (c != ' ' && c != '\n' && c != '\r' && c != '\t') { + break; + } + len--; + } + char *out = malloc(len + SKIP_ONE); + if (!out) { + return NULL; + } + memcpy(out, start, len); + out[len] = '\0'; + return out; +} + +static bool ptr_in_span(const char *p, const char *start, const char *end) { + return p && start && end && p >= start && p < end; +} + +static bool ptr_in_xml_comment(const char *xml, const char *p) { + const char *scan = xml; + while ((scan = strstr(scan, ""); + if (!close) { + return p >= scan; + } + const char *comment_end = close + strlen("-->"); + if (ptr_in_span(p, scan, comment_end)) { + return true; + } + scan = comment_end; + } + return false; +} + +static bool ptr_in_xml_block_named(const char *xml, const char *tag, const char *p) { + char open_pat[CBM_SZ_64]; + char close_pat[CBM_SZ_64]; + snprintf(open_pat, sizeof(open_pat), "<%s", tag); + snprintf(close_pat, sizeof(close_pat), "", tag); + + const char *scan = xml; + while ((scan = strstr(scan, open_pat)) != NULL) { + const char *open_end = strchr(scan, '>'); + if (!open_end) { + return false; + } + const char *close = strstr(open_end + SKIP_ONE, close_pat); + if (!close) { + return false; + } + const char *block_end = close + strlen(close_pat); + if (ptr_in_span(p, scan, block_end)) { + return true; + } + scan = block_end; + } + return false; +} + +static bool ptr_in_non_usage_dependency_block(const char *xml, const char *p) { + return ptr_in_xml_block_named(xml, "dependencyManagement", p) || + ptr_in_xml_block_named(xml, "plugin", p); +} + +static bool find_xml_block(const char *xml, const char *tag, const char **block_start, + const char **block_end) { + char open_pat[CBM_SZ_64]; + char close_pat[CBM_SZ_64]; + snprintf(open_pat, sizeof(open_pat), "<%s", tag); + snprintf(close_pat, sizeof(close_pat), "", tag); + const char *open = strstr(xml, open_pat); + if (!open) { + return false; + } + const char *open_end = strchr(open, '>'); + if (!open_end) { + return false; + } + const char *close = strstr(open_end + SKIP_ONE, close_pat); + if (!close) { + return false; + } + *block_start = open; + *block_end = close + strlen(close_pat); + return true; +} + +static char *xml_tag_text_dup(const char *start, const char *end, const char *tag) { + char open_pat[CBM_SZ_64]; + char close_pat[CBM_SZ_64]; + snprintf(open_pat, sizeof(open_pat), "<%s", tag); + snprintf(close_pat, sizeof(close_pat), "", tag); + + const char *p = start; + while ((p = strstr(p, open_pat)) != NULL && (!end || p < end)) { + const char *open_end = strchr(p, '>'); + if (!open_end || (end && open_end >= end)) { + return NULL; + } + const char *close = strstr(open_end + SKIP_ONE, close_pat); + if (!close || (end && close > end)) { + return NULL; + } + char *text = trim_dup(open_end + SKIP_ONE, (size_t)(close - open_end - SKIP_ONE)); + if (text && text[0]) { + return text; + } + free(text); + return NULL; + } + return NULL; +} + +static char *xml_tag_text_outside_project_blocks_dup(const char *xml, const char *tag) { + const char *parent_start = NULL; + const char *parent_end = NULL; + const char *deps_start = NULL; + const char *deps_end = NULL; + find_xml_block(xml, "parent", &parent_start, &parent_end); + find_xml_block(xml, "dependencies", &deps_start, &deps_end); + + char open_pat[CBM_SZ_64]; + char close_pat[CBM_SZ_64]; + snprintf(open_pat, sizeof(open_pat), "<%s", tag); + snprintf(close_pat, sizeof(close_pat), "", tag); + + const char *p = xml; + while ((p = strstr(p, open_pat)) != NULL) { + if (ptr_in_span(p, parent_start, parent_end) || ptr_in_span(p, deps_start, deps_end)) { + p += strlen(open_pat); + continue; + } + const char *open_end = strchr(p, '>'); + if (!open_end) { + return NULL; + } + const char *close = strstr(open_end + SKIP_ONE, close_pat); + if (!close) { + return NULL; + } + char *text = trim_dup(open_end + SKIP_ONE, (size_t)(close - open_end - SKIP_ONE)); + if (text && text[0]) { + return text; + } + free(text); + return NULL; + } + return NULL; +} + +static char *read_text_file_cap(const char *path) { + FILE *f = fopen(path, "rb"); + if (!f) { + return NULL; + } + if (fseek(f, 0, SEEK_END) != 0) { + fclose(f); + return NULL; + } + long sz = ftell(f); + if (sz < 0 || sz > MCR_MAX_POM_SIZE) { + fclose(f); + return NULL; + } + rewind(f); + char *buf = malloc((size_t)sz + SKIP_ONE); + if (!buf) { + fclose(f); + return NULL; + } + size_t n = fread(buf, SKIP_ONE, (size_t)sz, f); + fclose(f); + buf[n] = '\0'; + return buf; +} + +static char *project_root_dup(cbm_store_t *store, const char *project) { + cbm_project_t p = {0}; + if (cbm_store_get_project(store, project, &p) != CBM_STORE_OK || !p.root_path) { + return NULL; + } + char *root = malloc(strlen(p.root_path) + SKIP_ONE); + if (root) { + strcpy(root, p.root_path); + } + free((void *)p.name); + free((void *)p.indexed_at); + free((void *)p.root_path); + return root; +} + +static int list_pom_paths(cbm_store_t *store, const char *project, char ***out) { + *out = NULL; + struct sqlite3 *db = cbm_store_get_db(store); + if (!db) { + return 0; + } + sqlite3_stmt *st = NULL; + if (sqlite3_prepare_v2(db, + "SELECT DISTINCT file_path FROM nodes WHERE project=?1 AND " + "label='File' AND (name='pom.xml' OR file_path LIKE '%/pom.xml')", + CBM_NOT_FOUND, &st, NULL) != SQLITE_OK) { + return 0; + } + sqlite3_bind_text(st, SKIP_ONE, project, CBM_NOT_FOUND, SQLITE_STATIC); + + int cap = MCR_INIT_CAP; + int count = 0; + char **paths = malloc((size_t)cap * sizeof(char *)); + if (!paths) { + sqlite3_finalize(st); + return 0; + } + while (sqlite3_step(st) == SQLITE_ROW) { + const char *path = (const char *)sqlite3_column_text(st, 0); + if (!path || !text_has_suffix(path, "pom.xml")) { + continue; + } + if (count >= cap) { + void *grown = paths; + if (!cbm_cross_repo_maven_grow_array(&grown, &cap, sizeof(*paths), realloc)) { + break; + } + paths = grown; + } + paths[count] = malloc(strlen(path) + SKIP_ONE); + if (paths[count]) { + strcpy(paths[count], path); + count++; + } + } + sqlite3_finalize(st); + *out = paths; + return count; +} + +static void free_string_list(char **items, int count) { + for (int i = 0; i < count; i++) { + free(items[i]); + } + free(items); +} + +static int collect_artifacts(cbm_store_t *store, const char *project, mcr_artifact_t **out) { + *out = NULL; + char *root = project_root_dup(store, project); + if (!root) { + return 0; + } + + char **paths = NULL; + int path_count = list_pom_paths(store, project, &paths); + int cap = MCR_INIT_CAP; + int count = 0; + mcr_artifact_t *items = calloc((size_t)cap, sizeof(mcr_artifact_t)); + if (!items) { + free_string_list(paths, path_count); + free(root); + return 0; + } + + for (int i = 0; i < path_count; i++) { + char *full = join_root_path(root, paths[i]); + if (!full) { + continue; + } + char *xml = read_text_file_cap(full); + free(full); + if (!xml) { + continue; + } + + const char *parent_start = NULL; + const char *parent_end = NULL; + char *parent_group = NULL; + if (find_xml_block(xml, "parent", &parent_start, &parent_end)) { + parent_group = xml_tag_text_dup(parent_start, parent_end, "groupId"); + } + + char *group = xml_tag_text_outside_project_blocks_dup(xml, "groupId"); + char *artifact = xml_tag_text_outside_project_blocks_dup(xml, "artifactId"); + if (!group && parent_group) { + group = dup_cstr(parent_group); + } + if (group && group[0] && artifact && artifact[0]) { + if (count >= cap) { + void *grown = items; + if (!cbm_cross_repo_maven_grow_array(&grown, &cap, sizeof(*items), realloc)) { + free(parent_group); + free(group); + free(artifact); + free(xml); + break; + } + items = grown; + } + memset(&items[count], 0, sizeof(items[count])); + items[count].group_id = group; + items[count].artifact_id = artifact; + items[count].pom_path = dup_cstr(paths[i]); + if (!items[count].pom_path) { + free(items[count].group_id); + free(items[count].artifact_id); + free(parent_group); + memset(&items[count], 0, sizeof(items[count])); + free(xml); + break; + } + group = NULL; + artifact = NULL; + count++; + } + free(parent_group); + free(group); + free(artifact); + free(xml); + } + + free_string_list(paths, path_count); + free(root); + *out = items; + return count; +} + +static bool add_dependency(mcr_dependency_t **items, int *count, int *cap, const char *group, + const char *artifact, const char *version, const char *scope, + const char *pom_path) { + bool missing_storage = !items || !*items || !count || !cap; + bool missing_coordinate = !group || !artifact || !group[0] || !artifact[0]; + if (missing_storage || missing_coordinate) { + return false; + } + if (*count >= *cap) { + void *grown = *items; + if (!cbm_cross_repo_maven_grow_array(&grown, cap, sizeof(**items), realloc)) { + return false; + } + *items = grown; + } + mcr_dependency_t *dep = &(*items)[*count]; + memset(dep, 0, sizeof(*dep)); + dep->group_id = dup_cstr(group); + dep->artifact_id = dup_cstr(artifact); + dep->version = dup_cstr(version); + dep->scope = dup_cstr(scope); + dep->pom_path = dup_cstr(pom_path); + if (!dep->group_id || !dep->artifact_id || !dep->version || !dep->scope || !dep->pom_path) { + free_dependency_fields(dep); + return false; + } + (*count)++; + return true; +} + +static int collect_dependencies(cbm_store_t *store, const char *project, mcr_dependency_t **out) { + *out = NULL; + char *root = project_root_dup(store, project); + if (!root) { + return 0; + } + + char **paths = NULL; + int path_count = list_pom_paths(store, project, &paths); + int cap = MCR_INIT_CAP; + int count = 0; + mcr_dependency_t *items = calloc((size_t)cap, sizeof(mcr_dependency_t)); + if (!items) { + free_string_list(paths, path_count); + free(root); + return 0; + } + + for (int i = 0; i < path_count; i++) { + char *full = join_root_path(root, paths[i]); + if (!full) { + continue; + } + char *xml = read_text_file_cap(full); + free(full); + if (!xml) { + continue; + } + + const char *p = xml; + while ((p = strstr(p, "'); + if (!open_end) { + break; + } + const char *end = strstr(open_end + SKIP_ONE, ""); + if (!end) { + break; + } + + char *group = xml_tag_text_dup(open_end + SKIP_ONE, end, "groupId"); + char *artifact = xml_tag_text_dup(open_end + SKIP_ONE, end, "artifactId"); + char *version = xml_tag_text_dup(open_end + SKIP_ONE, end, "version"); + char *scope = xml_tag_text_dup(open_end + SKIP_ONE, end, "scope"); + + add_dependency(&items, &count, &cap, group, artifact, version, scope, paths[i]); + + free(group); + free(artifact); + free(version); + free(scope); + p = end + strlen(""); + } + free(xml); + } + + free_string_list(paths, path_count); + free(root); + *out = items; + return count; +} + +static int64_t project_node_id(cbm_store_t *store, const char *project) { + struct sqlite3 *db = cbm_store_get_db(store); + if (!db) { + return 0; + } + sqlite3_stmt *st = NULL; + if (sqlite3_prepare_v2(db, "SELECT id FROM nodes WHERE project=?1 AND label='Project' LIMIT 1", + CBM_NOT_FOUND, &st, NULL) != SQLITE_OK) { + return 0; + } + sqlite3_bind_text(st, SKIP_ONE, project, CBM_NOT_FOUND, SQLITE_STATIC); + int64_t id = 0; + if (sqlite3_step(st) == SQLITE_ROW) { + id = sqlite3_column_int64(st, 0); + } + sqlite3_finalize(st); + if (id != 0) { + return id; + } + + char *qn = format_prefixed_qn("__project__", project); + if (!qn) { + return 0; + } + cbm_node_t node = { + .project = project, + .label = "Project", + .name = project, + .qualified_name = qn, + .file_path = "", + .start_line = 0, + .end_line = 0, + .properties_json = "{}", + }; + id = cbm_store_upsert_node(store, &node); + free(qn); + return id; +} + +static void insert_cross_edge(cbm_store_t *store, const char *project, int64_t from_id, + int64_t to_id, const char *edge_type, const char *props) { + cbm_edge_t edge = { + .project = project, + .source_id = from_id, + .target_id = to_id, + .type = edge_type, + .properties_json = props, + }; + cbm_store_insert_edge(store, &edge); +} + +static void cleanup_reverse_library_edges(cbm_store_t *store, const char *project, + const char *source_project) { + if (!store || !project || !source_project) { + return; + } + struct sqlite3 *db = cbm_store_get_db(store); + if (!db) { + return; + } + sqlite3_stmt *st = NULL; + if (sqlite3_prepare_v2(db, + "DELETE FROM edges WHERE project=?1 AND type='CROSS_LIBRARY_USED_BY' " + "AND target_id IN (SELECT id FROM nodes WHERE project=?1 AND " + "label='LibraryConsumer' AND name=?2)", + CBM_NOT_FOUND, &st, NULL) == SQLITE_OK) { + sqlite3_bind_text(st, SKIP_ONE, project, CBM_NOT_FOUND, SQLITE_STATIC); + sqlite3_bind_text(st, PAIR_LEN, source_project, CBM_NOT_FOUND, SQLITE_STATIC); + sqlite3_step(st); + sqlite3_finalize(st); + } + st = NULL; + if (sqlite3_prepare_v2(db, + "DELETE FROM nodes WHERE project=?1 AND label='LibraryConsumer' " + "AND name=?2 AND qualified_name GLOB '__library_consumer__*'", + CBM_NOT_FOUND, &st, NULL) == SQLITE_OK) { + sqlite3_bind_text(st, SKIP_ONE, project, CBM_NOT_FOUND, SQLITE_STATIC); + sqlite3_bind_text(st, PAIR_LEN, source_project, CBM_NOT_FOUND, SQLITE_STATIC); + sqlite3_step(st); + sqlite3_finalize(st); + } +} + +static char *build_library_props(const char *other_project, const mcr_dependency_t *dep) { + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + if (!doc) { + return dup_cstr("{}"); + } + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + yyjson_mut_obj_add_strcpy(doc, root, "target_project", other_project ? other_project : ""); + yyjson_mut_obj_add_strcpy(doc, root, "group_id", dep->group_id); + yyjson_mut_obj_add_strcpy(doc, root, "artifact_id", dep->artifact_id); + yyjson_mut_obj_add_strcpy(doc, root, "version", dep->version); + yyjson_mut_obj_add_strcpy(doc, root, "scope", dep->scope); + yyjson_mut_obj_add_strcpy(doc, root, "source_pom", dep->pom_path ? dep->pom_path : ""); + + size_t len = 0; + char *json = yyjson_mut_write(doc, 0, &len); + if (!json) { + json = dup_cstr("{}"); + } + yyjson_mut_doc_free(doc); + return json; +} + +static char *format_library_qn(const char *prefix, const char *project, + const mcr_dependency_t *dep) { + if (!prefix || !project || !dep) { + return NULL; + } + const char *pom_path = dep->pom_path ? dep->pom_path : ""; + int needed = snprintf(NULL, 0, "%s%s__%s:%s__%s", prefix, project, dep->group_id, + dep->artifact_id, pom_path); + if (needed < 0) { + return NULL; + } + char *qn = malloc((size_t)needed + SKIP_ONE); + if (!qn) { + return NULL; + } + snprintf(qn, (size_t)needed + SKIP_ONE, "%s%s__%s:%s__%s", prefix, project, dep->group_id, + dep->artifact_id, pom_path); + return qn; +} + +static int emit_library_match(cbm_store_t *src_store, const char *src_project, + cbm_store_t *tgt_store, const char *tgt_project, + const mcr_dependency_t *dep) { + int64_t src_project_id = project_node_id(src_store, src_project); + int64_t tgt_project_id = project_node_id(tgt_store, tgt_project); + if (src_project_id <= 0 || tgt_project_id <= 0) { + return 0; + } + + char *lib_qn = format_library_qn("__library__", tgt_project, dep); + if (!lib_qn) { + return 0; + } + char *consumer_qn = format_library_qn("__library_consumer__", src_project, dep); + if (!consumer_qn) { + free(lib_qn); + return 0; + } + + char *props = build_library_props(tgt_project, dep); + if (!props) { + free(consumer_qn); + free(lib_qn); + return 0; + } + cbm_node_t lib = { + .project = src_project, + .label = "Library", + .name = dep->artifact_id, + .qualified_name = lib_qn, + .file_path = dep->pom_path ? dep->pom_path : "", + .start_line = 0, + .end_line = 0, + .properties_json = props, + }; + int64_t lib_id = cbm_store_upsert_node(src_store, &lib); + if (lib_id <= 0) { + free(props); + free(consumer_qn); + free(lib_qn); + return 0; + } + + char *reverse_props = build_library_props(src_project, dep); + if (!reverse_props) { + free(props); + free(consumer_qn); + free(lib_qn); + return 0; + } + cbm_node_t consumer = { + .project = tgt_project, + .label = "LibraryConsumer", + .name = src_project, + .qualified_name = consumer_qn, + .file_path = "", + .start_line = 0, + .end_line = 0, + .properties_json = reverse_props, + }; + int64_t consumer_id = cbm_store_upsert_node(tgt_store, &consumer); + if (consumer_id <= 0) { + free(reverse_props); + free(props); + free(consumer_qn); + free(lib_qn); + return 0; + } + insert_cross_edge(src_store, src_project, src_project_id, lib_id, "CROSS_LIBRARY_DEPENDS_ON", + props); + insert_cross_edge(tgt_store, tgt_project, tgt_project_id, consumer_id, "CROSS_LIBRARY_USED_BY", + reverse_props); + free(reverse_props); + free(props); + free(consumer_qn); + free(lib_qn); + return 1; +} + +static bool artifact_matches_dep(const mcr_artifact_t *artifact, const mcr_dependency_t *dep) { + return strcmp(artifact->group_id, dep->group_id) == 0 && + strcmp(artifact->artifact_id, dep->artifact_id) == 0; +} + +int cbm_cross_repo_match_maven_libraries(cbm_store_t *src_store, const char *src_project, + cbm_store_t *tgt_store, const char *tgt_project) { + mcr_dependency_t *deps = NULL; + mcr_artifact_t *artifacts = NULL; + cleanup_reverse_library_edges(tgt_store, tgt_project, src_project); + int dep_count = collect_dependencies(src_store, src_project, &deps); + int artifact_count = collect_artifacts(tgt_store, tgt_project, &artifacts); + if (dep_count == 0 || artifact_count == 0) { + free_dependencies(deps, dep_count); + free_artifacts(artifacts, artifact_count); + return 0; + } + + int count = 0; + for (int d = 0; d < dep_count && count < MCR_MAX_EDGES; d++) { + for (int a = 0; a < artifact_count; a++) { + if (!artifact_matches_dep(&artifacts[a], &deps[d])) { + continue; + } + count += emit_library_match(src_store, src_project, tgt_store, tgt_project, &deps[d]); + break; + } + } + + free_dependencies(deps, dep_count); + free_artifacts(artifacts, artifact_count); + return count; +} diff --git a/src/pipeline/pass_cross_repo_maven.h b/src/pipeline/pass_cross_repo_maven.h new file mode 100644 index 00000000..29d7546b --- /dev/null +++ b/src/pipeline/pass_cross_repo_maven.h @@ -0,0 +1,12 @@ +/* + * pass_cross_repo_maven.h - Maven artifact matching for cross-repo links. + */ +#ifndef CBM_PASS_CROSS_REPO_MAVEN_H +#define CBM_PASS_CROSS_REPO_MAVEN_H + +#include "store/store.h" + +int cbm_cross_repo_match_maven_libraries(cbm_store_t *src_store, const char *src_project, + cbm_store_t *tgt_store, const char *tgt_project); + +#endif /* CBM_PASS_CROSS_REPO_MAVEN_H */ diff --git a/tests/test_pipeline.c b/tests/test_pipeline.c index 4ce32623..73f0cb87 100644 --- a/tests/test_pipeline.c +++ b/tests/test_pipeline.c @@ -10,11 +10,14 @@ #include "test_helpers.h" #include "pipeline/pipeline.h" #include "pipeline/pipeline_internal.h" +#include "pipeline/pass_cross_repo.h" #include "store/store.h" +#include #include // properties-JSON validity (oversized-props regression) #include #include +#include #include #include "foundation/compat_thread.h" #include @@ -1061,6 +1064,838 @@ static void teardown_usages_repo(void) { g_usages_tmpdir[0] = '\0'; } +static int count_edges_by_type(cbm_store_t *s, const char *project, const char *edge_type) { + sqlite3_stmt *stmt = NULL; + struct sqlite3 *db = cbm_store_get_db(s); + if (!db) { + return -1; + } + if (sqlite3_prepare_v2(db, "SELECT COUNT(*) FROM edges WHERE project=?1 AND type=?2", -1, &stmt, + NULL) != SQLITE_OK) { + return -1; + } + sqlite3_bind_text(stmt, 1, project, -1, SQLITE_STATIC); + sqlite3_bind_text(stmt, 2, edge_type, -1, SQLITE_STATIC); + int count = -1; + if (sqlite3_step(stmt) == SQLITE_ROW) { + count = sqlite3_column_int(stmt, 0); + } + sqlite3_finalize(stmt); + return count; +} + +static int edge_props_are_valid_json(cbm_store_t *s, const char *project, const char *edge_type) { + sqlite3_stmt *stmt = NULL; + struct sqlite3 *db = cbm_store_get_db(s); + if (!db) { + return 0; + } + if (sqlite3_prepare_v2(db, "SELECT properties FROM edges WHERE project=?1 AND type=?2", -1, + &stmt, NULL) != SQLITE_OK) { + return 0; + } + sqlite3_bind_text(stmt, 1, project, -1, SQLITE_STATIC); + sqlite3_bind_text(stmt, 2, edge_type, -1, SQLITE_STATIC); + + int seen = 0; + int ok = 1; + while (sqlite3_step(stmt) == SQLITE_ROW) { + seen++; + const char *props = (const char *)sqlite3_column_text(stmt, 0); + yyjson_doc *doc = props ? yyjson_read(props, strlen(props), 0) : NULL; + if (!doc) { + ok = 0; + break; + } + yyjson_doc_free(doc); + } + sqlite3_finalize(stmt); + return seen > 0 && ok; +} + +static int node_exists_by_qn(cbm_store_t *s, const char *project, const char *qn) { + sqlite3_stmt *stmt = NULL; + struct sqlite3 *db = cbm_store_get_db(s); + if (!db) { + return 0; + } + if (sqlite3_prepare_v2(db, "SELECT COUNT(*) FROM nodes WHERE project=?1 AND qualified_name=?2", + -1, &stmt, NULL) != SQLITE_OK) { + return 0; + } + sqlite3_bind_text(stmt, 1, project, -1, SQLITE_STATIC); + sqlite3_bind_text(stmt, 2, qn, -1, SQLITE_STATIC); + int count = 0; + if (sqlite3_step(stmt) == SQLITE_ROW) { + count = sqlite3_column_int(stmt, 0); + } + sqlite3_finalize(stmt); + return count > 0; +} + +static int count_nodes_by_label(cbm_store_t *s, const char *project, const char *label) { + sqlite3_stmt *stmt = NULL; + struct sqlite3 *db = cbm_store_get_db(s); + if (!db) { + return -1; + } + if (sqlite3_prepare_v2(db, "SELECT COUNT(*) FROM nodes WHERE project=?1 AND label=?2", -1, + &stmt, NULL) != SQLITE_OK) { + return -1; + } + sqlite3_bind_text(stmt, 1, project, -1, SQLITE_STATIC); + sqlite3_bind_text(stmt, 2, label, -1, SQLITE_STATIC); + int count = -1; + if (sqlite3_step(stmt) == SQLITE_ROW) { + count = sqlite3_column_int(stmt, 0); + } + sqlite3_finalize(stmt); + return count; +} + +typedef struct { + char cache[256]; + char provider_root[256]; + char consumer_root[256]; + char provider_db[512]; + char consumer_db[512]; + char previous_cache[512]; + int had_previous_cache; +} cross_maven_fixture_t; + +static void restore_cross_maven_cache_env(cross_maven_fixture_t *fx) { + if (fx->had_previous_cache) { + cbm_setenv("CBM_CACHE_DIR", fx->previous_cache, 1); + } else { + cbm_unsetenv("CBM_CACHE_DIR"); + } +} + +static int setup_cross_maven_fixture(cross_maven_fixture_t *fx, const char *provider_pom, + const char *consumer_pom) { + memset(fx, 0, sizeof(*fx)); + snprintf(fx->cache, sizeof(fx->cache), "/tmp/cbm_cross_maven_cache_XXXXXX"); + if (!cbm_mkdtemp(fx->cache)) { + return -1; + } + snprintf(fx->provider_root, sizeof(fx->provider_root), "/tmp/cbm_provider_maven_XXXXXX"); + if (!cbm_mkdtemp(fx->provider_root)) { + th_cleanup(fx->cache); + return -1; + } + snprintf(fx->consumer_root, sizeof(fx->consumer_root), "/tmp/cbm_consumer_maven_XXXXXX"); + if (!cbm_mkdtemp(fx->consumer_root)) { + th_cleanup(fx->provider_root); + th_cleanup(fx->cache); + return -1; + } + + if (th_write_file(TH_PATH(fx->provider_root, "pom.xml"), provider_pom) != 0 || + th_write_file(TH_PATH(fx->consumer_root, "pom.xml"), consumer_pom) != 0) { + th_cleanup(fx->consumer_root); + th_cleanup(fx->provider_root); + th_cleanup(fx->cache); + return -1; + } + + const char *previous_cache = getenv("CBM_CACHE_DIR"); + if (previous_cache) { + fx->had_previous_cache = 1; + snprintf(fx->previous_cache, sizeof(fx->previous_cache), "%s", previous_cache); + } + cbm_setenv("CBM_CACHE_DIR", fx->cache, 1); + snprintf(fx->provider_db, sizeof(fx->provider_db), "%s/provider.db", fx->cache); + snprintf(fx->consumer_db, sizeof(fx->consumer_db), "%s/consumer.db", fx->cache); + + cbm_store_t *provider = cbm_store_open_path(fx->provider_db); + cbm_store_t *consumer = cbm_store_open_path(fx->consumer_db); + if (!provider || !consumer) { + if (provider) { + cbm_store_close(provider); + } + if (consumer) { + cbm_store_close(consumer); + } + th_cleanup(fx->consumer_root); + th_cleanup(fx->provider_root); + th_cleanup(fx->cache); + restore_cross_maven_cache_env(fx); + return -1; + } + int ok = cbm_store_upsert_project(provider, "provider", fx->provider_root) == CBM_STORE_OK && + cbm_store_upsert_project(consumer, "consumer", fx->consumer_root) == CBM_STORE_OK; + + cbm_node_t provider_project = {.project = "provider", + .label = "Project", + .name = "provider", + .qualified_name = "__project__provider", + .file_path = "", + .start_line = 0, + .end_line = 0, + .properties_json = "{}"}; + cbm_node_t consumer_project = {.project = "consumer", + .label = "Project", + .name = "consumer", + .qualified_name = "__project__consumer", + .file_path = "", + .start_line = 0, + .end_line = 0, + .properties_json = "{}"}; + cbm_node_t provider_pom_node = {.project = "provider", + .label = "File", + .name = "pom.xml", + .qualified_name = "provider.pom", + .file_path = "pom.xml", + .start_line = 1, + .end_line = 1, + .properties_json = "{}"}; + cbm_node_t consumer_pom_node = {.project = "consumer", + .label = "File", + .name = "pom.xml", + .qualified_name = "consumer.pom", + .file_path = "pom.xml", + .start_line = 1, + .end_line = 1, + .properties_json = "{}"}; + ok = ok && cbm_store_upsert_node(provider, &provider_project) > 0 && + cbm_store_upsert_node(consumer, &consumer_project) > 0 && + cbm_store_upsert_node(provider, &provider_pom_node) > 0 && + cbm_store_upsert_node(consumer, &consumer_pom_node) > 0; + + cbm_store_close(provider); + cbm_store_close(consumer); + if (!ok) { + th_cleanup(fx->consumer_root); + th_cleanup(fx->provider_root); + th_cleanup(fx->cache); + restore_cross_maven_cache_env(fx); + return -1; + } + return 0; +} + +static void cleanup_cross_maven_fixture(cross_maven_fixture_t *fx) { + th_cleanup(fx->consumer_root); + th_cleanup(fx->provider_root); + th_cleanup(fx->cache); + restore_cross_maven_cache_env(fx); +} + +extern bool cbm_cross_repo_maven_grow_array(void **items, int *cap, size_t elem_size, + void *(*realloc_fn)(void *, size_t)); +extern bool cbm_cross_repo_project_list_alloc(char ***out, int cap, void *(*malloc_fn)(size_t)); + +static void *test_maven_realloc_fails(void *ptr, size_t size) { + (void)ptr; + (void)size; + return NULL; +} + +static int g_test_maven_realloc_calls; + +static void *test_maven_realloc_records_call(void *ptr, size_t size) { + (void)size; + g_test_maven_realloc_calls++; + return ptr; +} + +static void *test_project_list_malloc_fails(size_t size) { + (void)size; + return NULL; +} + +TEST(cross_repo_project_list_initial_alloc_failure_returns_empty) { + char **projects = (char **)1; + + ASSERT_FALSE(cbm_cross_repo_project_list_alloc(&projects, 32, test_project_list_malloc_fails)); + ASSERT_NULL(projects); + + projects = (char **)1; + ASSERT_FALSE(cbm_cross_repo_project_list_alloc(&projects, 0, test_project_list_malloc_fails)); + ASSERT_NULL(projects); + + PASS(); +} + +TEST(cross_repo_maven_failed_growth_preserves_capacity) { + int cap = 32; + char *items = malloc((size_t)cap); + ASSERT_NOT_NULL(items); + void *original = items; + + ASSERT_FALSE(cbm_cross_repo_maven_grow_array((void **)&items, &cap, sizeof(*items), + test_maven_realloc_fails)); + ASSERT_EQ(cap, 32); + ASSERT_EQ(items == original, 1); + + free(items); + PASS(); +} + +TEST(cross_repo_maven_growth_rejects_byte_overflow_before_realloc) { + int cap = 2; + char *items = malloc(1); + ASSERT_NOT_NULL(items); + void *original = items; + g_test_maven_realloc_calls = 0; + + ASSERT_FALSE(cbm_cross_repo_maven_grow_array((void **)&items, &cap, + (SIZE_MAX / (size_t)(cap * PAIR_LEN)) + 1, + test_maven_realloc_records_call)); + ASSERT_EQ(g_test_maven_realloc_calls, 0); + ASSERT_EQ(cap, 2); + ASSERT_EQ(items == original, 1); + + free(items); + PASS(); +} + +TEST(cross_repo_maven_dependency_creates_library_edges) { + const char *provider_pom = "4.0.0" + "com.example.platform" + "shared-library" + "1.0.0"; + const char *consumer_pom = "4.0.0" + "appconsumer" + "com.example.platform" + "shared-library1.0.0" + "vendor.client" + "vendor-client2.0.0" + "com.example.platform" + "shared-library" + ""; + cross_maven_fixture_t fx; + ASSERT_EQ(setup_cross_maven_fixture(&fx, provider_pom, consumer_pom), 0); + + const char *targets[] = {"provider"}; + cbm_cross_repo_result_t result = cbm_cross_repo_match("consumer", targets, 1); + ASSERT_EQ(result.library_edges, 1); + + cbm_store_t *consumer = cbm_store_open_path(fx.consumer_db); + cbm_store_t *provider = cbm_store_open_path(fx.provider_db); + ASSERT_NOT_NULL(consumer); + ASSERT_NOT_NULL(provider); + ASSERT_EQ(count_edges_by_type(consumer, "consumer", "CROSS_LIBRARY_DEPENDS_ON"), 1); + ASSERT_EQ(count_edges_by_type(provider, "provider", "CROSS_LIBRARY_USED_BY"), 1); + cbm_store_close(consumer); + cbm_store_close(provider); + + cleanup_cross_maven_fixture(&fx); + PASS(); +} + +TEST(cross_repo_maven_dependency_escapes_library_edge_props) { + const char *provider_pom = "4.0.0" + "com.example\"platform" + "shared-library" + "1.0.0"; + const char *consumer_pom = "4.0.0" + "appconsumer" + "com.example\"platform" + "shared-library1.0.0" + ""; + cross_maven_fixture_t fx; + ASSERT_EQ(setup_cross_maven_fixture(&fx, provider_pom, consumer_pom), 0); + + const char *targets[] = {"provider"}; + cbm_cross_repo_result_t result = cbm_cross_repo_match("consumer", targets, 1); + ASSERT_EQ(result.library_edges, 1); + + cbm_store_t *consumer = cbm_store_open_path(fx.consumer_db); + cbm_store_t *provider = cbm_store_open_path(fx.provider_db); + ASSERT_NOT_NULL(consumer); + ASSERT_NOT_NULL(provider); + ASSERT_TRUE(edge_props_are_valid_json(consumer, "consumer", "CROSS_LIBRARY_DEPENDS_ON")); + ASSERT_TRUE(edge_props_are_valid_json(provider, "provider", "CROSS_LIBRARY_USED_BY")); + cbm_store_close(consumer); + cbm_store_close(provider); + + cleanup_cross_maven_fixture(&fx); + PASS(); +} + +TEST(cross_repo_maven_dependency_management_does_not_create_library_edge) { + const char *provider_pom = "4.0.0" + "com.example.platform" + "shared-library" + "1.0.0"; + const char *consumer_pom = + "4.0.0" + "appconsumer" + "" + "com.example.platformshared-library" + "1.0.0" + ""; + cross_maven_fixture_t fx; + ASSERT_EQ(setup_cross_maven_fixture(&fx, provider_pom, consumer_pom), 0); + + const char *targets[] = {"provider"}; + cbm_cross_repo_result_t result = cbm_cross_repo_match("consumer", targets, 1); + ASSERT_EQ(result.library_edges, 0); + + cbm_store_t *consumer = cbm_store_open_path(fx.consumer_db); + cbm_store_t *provider = cbm_store_open_path(fx.provider_db); + ASSERT_NOT_NULL(consumer); + ASSERT_NOT_NULL(provider); + ASSERT_EQ(count_edges_by_type(consumer, "consumer", "CROSS_LIBRARY_DEPENDS_ON"), 0); + ASSERT_EQ(count_edges_by_type(provider, "provider", "CROSS_LIBRARY_USED_BY"), 0); + cbm_store_close(consumer); + cbm_store_close(provider); + + cleanup_cross_maven_fixture(&fx); + PASS(); +} + +TEST(cross_repo_maven_commented_dependency_does_not_create_library_edge) { + const char *provider_pom = "4.0.0" + "com.example.platform" + "shared-library" + "1.0.0"; + const char *consumer_pom = + "4.0.0" + "appconsumer" + ""; + cross_maven_fixture_t fx; + ASSERT_EQ(setup_cross_maven_fixture(&fx, provider_pom, consumer_pom), 0); + + const char *targets[] = {"provider"}; + cbm_cross_repo_result_t result = cbm_cross_repo_match("consumer", targets, 1); + ASSERT_EQ(result.library_edges, 0); + + cbm_store_t *consumer = cbm_store_open_path(fx.consumer_db); + cbm_store_t *provider = cbm_store_open_path(fx.provider_db); + ASSERT_NOT_NULL(consumer); + ASSERT_NOT_NULL(provider); + ASSERT_EQ(count_edges_by_type(consumer, "consumer", "CROSS_LIBRARY_DEPENDS_ON"), 0); + ASSERT_EQ(count_edges_by_type(provider, "provider", "CROSS_LIBRARY_USED_BY"), 0); + cbm_store_close(consumer); + cbm_store_close(provider); + + cleanup_cross_maven_fixture(&fx); + PASS(); +} + +TEST(cross_repo_maven_plugin_dependency_does_not_create_library_edge) { + const char *provider_pom = "4.0.0" + "com.example.platform" + "shared-library" + "1.0.0"; + const char *consumer_pom = "4.0.0" + "appconsumer" + "org.apache.maven.plugins" + "maven-plugin1.0.0" + "com.example.platform" + "shared-library1.0.0" + ""; + cross_maven_fixture_t fx; + ASSERT_EQ(setup_cross_maven_fixture(&fx, provider_pom, consumer_pom), 0); + + const char *targets[] = {"provider"}; + cbm_cross_repo_result_t result = cbm_cross_repo_match("consumer", targets, 1); + ASSERT_EQ(result.library_edges, 0); + + cbm_store_t *consumer = cbm_store_open_path(fx.consumer_db); + cbm_store_t *provider = cbm_store_open_path(fx.provider_db); + ASSERT_NOT_NULL(consumer); + ASSERT_NOT_NULL(provider); + ASSERT_EQ(count_edges_by_type(consumer, "consumer", "CROSS_LIBRARY_DEPENDS_ON"), 0); + ASSERT_EQ(count_edges_by_type(provider, "provider", "CROSS_LIBRARY_USED_BY"), 0); + cbm_store_close(consumer); + cbm_store_close(provider); + + cleanup_cross_maven_fixture(&fx); + PASS(); +} + +TEST(cross_repo_maven_cleanup_preserves_unrelated_nodes) { + const char *provider_pom = "4.0.0" + "com.example.platform" + "shared-library" + "1.0.0"; + const char *consumer_pom = "4.0.0" + "appconsumer" + "com.example.platform" + "shared-library1.0.0" + ""; + cross_maven_fixture_t fx; + ASSERT_EQ(setup_cross_maven_fixture(&fx, provider_pom, consumer_pom), 0); + + cbm_store_t *consumer = cbm_store_open_path(fx.consumer_db); + ASSERT_NOT_NULL(consumer); + cbm_node_t unrelated = {.project = "consumer", + .label = "Function", + .name = "Unrelated", + .qualified_name = "xxlibraryzz_should_stay", + .file_path = "src/main.c", + .start_line = 1, + .end_line = 1, + .properties_json = "{}"}; + cbm_node_t prefixed_function = {.project = "consumer", + .label = "Function", + .name = "PrefixedFunction", + .qualified_name = "__library__manual_function_should_stay", + .file_path = "src/manual.c", + .start_line = 1, + .end_line = 1, + .properties_json = "{}"}; + ASSERT_GT(cbm_store_upsert_node(consumer, &unrelated), 0); + ASSERT_GT(cbm_store_upsert_node(consumer, &prefixed_function), 0); + cbm_store_close(consumer); + + const char *targets[] = {"provider"}; + cbm_cross_repo_result_t result = cbm_cross_repo_match("consumer", targets, 1); + ASSERT_EQ(result.library_edges, 1); + + consumer = cbm_store_open_path(fx.consumer_db); + ASSERT_NOT_NULL(consumer); + ASSERT_TRUE(node_exists_by_qn(consumer, "consumer", "xxlibraryzz_should_stay")); + ASSERT_TRUE(node_exists_by_qn(consumer, "consumer", "__library__manual_function_should_stay")); + cbm_store_close(consumer); + + cleanup_cross_maven_fixture(&fx); + PASS(); +} + +TEST(cross_repo_maven_provider_rerun_preserves_incoming_used_by) { + const char *provider_pom = "4.0.0" + "com.example.platform" + "shared-library" + "1.0.0"; + const char *consumer_pom = "4.0.0" + "appconsumer" + "com.example.platform" + "shared-library1.0.0" + ""; + cross_maven_fixture_t fx; + ASSERT_EQ(setup_cross_maven_fixture(&fx, provider_pom, consumer_pom), 0); + + const char *provider_targets[] = {"provider"}; + cbm_cross_repo_result_t result = cbm_cross_repo_match("consumer", provider_targets, 1); + ASSERT_EQ(result.library_edges, 1); + + cbm_store_t *provider = cbm_store_open_path(fx.provider_db); + ASSERT_NOT_NULL(provider); + ASSERT_EQ(count_edges_by_type(provider, "provider", "CROSS_LIBRARY_USED_BY"), 1); + cbm_store_close(provider); + + const char *consumer_targets[] = {"consumer"}; + result = cbm_cross_repo_match("provider", consumer_targets, 1); + ASSERT_EQ(result.library_edges, 0); + + provider = cbm_store_open_path(fx.provider_db); + ASSERT_NOT_NULL(provider); + ASSERT_EQ(count_edges_by_type(provider, "provider", "CROSS_LIBRARY_USED_BY"), 1); + cbm_store_close(provider); + + cleanup_cross_maven_fixture(&fx); + PASS(); +} + +TEST(cross_repo_maven_removed_dependency_clears_provider_used_by) { + const char *provider_pom = "4.0.0" + "com.example.platform" + "shared-library" + "1.0.0"; + const char *consumer_pom = "4.0.0" + "appconsumer" + "com.example.platform" + "shared-library1.0.0" + ""; + cross_maven_fixture_t fx; + ASSERT_EQ(setup_cross_maven_fixture(&fx, provider_pom, consumer_pom), 0); + + const char *targets[] = {"provider"}; + cbm_cross_repo_result_t result = cbm_cross_repo_match("consumer", targets, 1); + ASSERT_EQ(result.library_edges, 1); + + ASSERT_EQ(th_write_file(TH_PATH(fx.consumer_root, "pom.xml"), + "4.0.0" + "appconsumer" + ""), + 0); + result = cbm_cross_repo_match("consumer", targets, 1); + ASSERT_EQ(result.library_edges, 0); + + cbm_store_t *consumer = cbm_store_open_path(fx.consumer_db); + cbm_store_t *provider = cbm_store_open_path(fx.provider_db); + ASSERT_NOT_NULL(consumer); + ASSERT_NOT_NULL(provider); + ASSERT_EQ(count_edges_by_type(consumer, "consumer", "CROSS_LIBRARY_DEPENDS_ON"), 0); + ASSERT_EQ(count_edges_by_type(provider, "provider", "CROSS_LIBRARY_USED_BY"), 0); + cbm_store_close(consumer); + cbm_store_close(provider); + + cleanup_cross_maven_fixture(&fx); + PASS(); +} + +TEST(cross_repo_maven_long_coordinates_do_not_collide) { + char group_a[180]; + char group_b[180]; + memset(group_a, 'g', 150); + memset(group_b, 'g', 150); + group_a[150] = 'a'; + group_b[150] = 'b'; + group_a[151] = '\0'; + group_b[151] = '\0'; + + const char *provider_pom = "4.0.0" + "providerroot" + "1.0.0"; + char consumer_pom[2048]; + snprintf(consumer_pom, sizeof(consumer_pom), + "4.0.0" + "appconsumer" + "%s" + "shared-library1.0.0" + "%sshared-library" + "1.0.0", + group_a, group_b); + cross_maven_fixture_t fx; + ASSERT_EQ(setup_cross_maven_fixture(&fx, provider_pom, consumer_pom), 0); + + char provider_a[512]; + char provider_b[512]; + snprintf(provider_a, sizeof(provider_a), + "4.0.0%s" + "shared-library1.0.0", + group_a); + snprintf(provider_b, sizeof(provider_b), + "4.0.0%s" + "shared-library1.0.0", + group_b); + const char *provider_path_a = "modules/a/pom.xml"; + const char *provider_path_b = "modules/b/pom.xml"; + ASSERT_EQ(th_write_file(TH_PATH(fx.provider_root, provider_path_a), provider_a), 0); + ASSERT_EQ(th_write_file(TH_PATH(fx.provider_root, provider_path_b), provider_b), 0); + + cbm_store_t *provider = cbm_store_open_path(fx.provider_db); + ASSERT_NOT_NULL(provider); + cbm_node_t provider_pom_a = {.project = "provider", + .label = "File", + .name = "pom.xml", + .qualified_name = "provider.long.a.pom", + .file_path = provider_path_a, + .start_line = 1, + .end_line = 1, + .properties_json = "{}"}; + cbm_node_t provider_pom_b = {.project = "provider", + .label = "File", + .name = "pom.xml", + .qualified_name = "provider.long.b.pom", + .file_path = provider_path_b, + .start_line = 1, + .end_line = 1, + .properties_json = "{}"}; + ASSERT_GT(cbm_store_upsert_node(provider, &provider_pom_a), 0); + ASSERT_GT(cbm_store_upsert_node(provider, &provider_pom_b), 0); + cbm_store_close(provider); + + const char *targets[] = {"provider"}; + cbm_cross_repo_result_t result = cbm_cross_repo_match("consumer", targets, 1); + ASSERT_EQ(result.library_edges, 2); + + cbm_store_t *consumer = cbm_store_open_path(fx.consumer_db); + ASSERT_NOT_NULL(consumer); + ASSERT_EQ(count_edges_by_type(consumer, "consumer", "CROSS_LIBRARY_DEPENDS_ON"), 2); + ASSERT_EQ(count_nodes_by_label(consumer, "consumer", "Library"), 2); + cbm_store_close(consumer); + + cleanup_cross_maven_fixture(&fx); + PASS(); +} + +TEST(cross_repo_maven_fixture_restores_cache_dir) { + const char *provider_pom = "4.0.0" + "com.example.platform" + "shared-library" + "1.0.0"; + const char *consumer_pom = "4.0.0" + "appconsumer"; + const char *original = getenv("CBM_CACHE_DIR"); + char original_copy[512] = {0}; + if (original) { + snprintf(original_copy, sizeof(original_copy), "%s", original); + } + + cross_maven_fixture_t fx; + ASSERT_EQ(setup_cross_maven_fixture(&fx, provider_pom, consumer_pom), 0); + ASSERT_STR_EQ(getenv("CBM_CACHE_DIR"), fx.cache); + cleanup_cross_maven_fixture(&fx); + + if (original) { + ASSERT_STR_EQ(getenv("CBM_CACHE_DIR"), original_copy); + } else { + ASSERT_EQ(getenv("CBM_CACHE_DIR") == NULL, 1); + } + PASS(); +} + +TEST(cross_repo_maven_long_references_do_not_collide) { + char group[128]; + char artifact[128]; + memset(group, 'g', sizeof(group) - 1); + group[sizeof(group) - 1] = '\0'; + memset(artifact, 'a', sizeof(artifact) - 1); + artifact[sizeof(artifact) - 1] = '\0'; + + char provider_pom[512]; + snprintf(provider_pom, sizeof(provider_pom), + "4.0.0%s" + "%s1.0.0", + group, artifact); + const char *consumer_pom = "4.0.0" + "appconsumer"; + cross_maven_fixture_t fx; + ASSERT_EQ(setup_cross_maven_fixture(&fx, provider_pom, consumer_pom), 0); + + char common_path[240]; + memset(common_path, 0, sizeof(common_path)); + snprintf(common_path, sizeof(common_path), + "modules/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/" + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/" + "cccccccccccccccccccccccccccccccccccccccc/"); + char dep_path_a[300]; + char dep_path_b[300]; + snprintf(dep_path_a, sizeof(dep_path_a), "%sdep-a/pom.xml", common_path); + snprintf(dep_path_b, sizeof(dep_path_b), "%sdep-b/pom.xml", common_path); + + char dep_pom[512]; + snprintf(dep_pom, sizeof(dep_pom), + "4.0.0" + "appconsumer-module" + "%s%s" + "1.0.0", + group, artifact); + int write_rc = th_write_file(TH_PATH(fx.consumer_root, dep_path_a), dep_pom); + if (write_rc != 0) { + cleanup_cross_maven_fixture(&fx); + } + ASSERT_EQ(write_rc, 0); + write_rc = th_write_file(TH_PATH(fx.consumer_root, dep_path_b), dep_pom); + if (write_rc != 0) { + cleanup_cross_maven_fixture(&fx); + } + ASSERT_EQ(write_rc, 0); + + cbm_store_t *consumer = cbm_store_open_path(fx.consumer_db); + ASSERT_NOT_NULL(consumer); + cbm_node_t dep_pom_a = {.project = "consumer", + .label = "File", + .name = "pom.xml", + .qualified_name = "consumer.long.a.pom", + .file_path = dep_path_a, + .start_line = 1, + .end_line = 1, + .properties_json = "{}"}; + cbm_node_t dep_pom_b = {.project = "consumer", + .label = "File", + .name = "pom.xml", + .qualified_name = "consumer.long.b.pom", + .file_path = dep_path_b, + .start_line = 1, + .end_line = 1, + .properties_json = "{}"}; + ASSERT_GT(cbm_store_upsert_node(consumer, &dep_pom_a), 0); + ASSERT_GT(cbm_store_upsert_node(consumer, &dep_pom_b), 0); + cbm_store_close(consumer); + + const char *targets[] = {"provider"}; + cbm_cross_repo_result_t result = cbm_cross_repo_match("consumer", targets, 1); + ASSERT_EQ(result.library_edges, 2); + + consumer = cbm_store_open_path(fx.consumer_db); + ASSERT_NOT_NULL(consumer); + ASSERT_EQ(count_nodes_by_label(consumer, "consumer", "Library"), 2); + cbm_store_close(consumer); + + cleanup_cross_maven_fixture(&fx); + PASS(); +} + +TEST(cross_repo_maven_very_long_pom_paths_do_not_truncate) { +#ifdef _WIN32 + SKIP_PLATFORM("Windows does not reliably allow 512+ character fixture paths"); +#endif + const char *provider_pom = "4.0.0" + "com.example.platform" + "shared-library" + "1.0.0"; + const char *consumer_pom = "4.0.0" + "appconsumer"; + const char *dep_pom = "4.0.0" + "appconsumer-module" + "com.example.platform" + "shared-library1.0.0" + ""; + cross_maven_fixture_t fx; + ASSERT_EQ(setup_cross_maven_fixture(&fx, provider_pom, consumer_pom), 0); + + const char *common_path = + "modules/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/" + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/" + "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc/" + "dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd/" + "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee/" + "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff/" + "gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg/" + "hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh/"; + char dep_path_a[640]; + char dep_path_b[640]; + snprintf(dep_path_a, sizeof(dep_path_a), "%sdep-a/pom.xml", common_path); + snprintf(dep_path_b, sizeof(dep_path_b), "%sdep-b/pom.xml", common_path); + ASSERT_GT((int)strlen(dep_path_a), 512); + ASSERT_EQ(strncmp(dep_path_a, dep_path_b, 512), 0); + + int write_rc = th_write_file(TH_PATH(fx.consumer_root, dep_path_a), dep_pom); + if (write_rc != 0) { + cleanup_cross_maven_fixture(&fx); + } + ASSERT_EQ(write_rc, 0); + write_rc = th_write_file(TH_PATH(fx.consumer_root, dep_path_b), dep_pom); + if (write_rc != 0) { + cleanup_cross_maven_fixture(&fx); + } + ASSERT_EQ(write_rc, 0); + + cbm_store_t *consumer = cbm_store_open_path(fx.consumer_db); + ASSERT_NOT_NULL(consumer); + cbm_node_t dep_pom_a = {.project = "consumer", + .label = "File", + .name = "pom.xml", + .qualified_name = "consumer.very.long.a.pom", + .file_path = dep_path_a, + .start_line = 1, + .end_line = 1, + .properties_json = "{}"}; + cbm_node_t dep_pom_b = {.project = "consumer", + .label = "File", + .name = "pom.xml", + .qualified_name = "consumer.very.long.b.pom", + .file_path = dep_path_b, + .start_line = 1, + .end_line = 1, + .properties_json = "{}"}; + ASSERT_GT(cbm_store_upsert_node(consumer, &dep_pom_a), 0); + ASSERT_GT(cbm_store_upsert_node(consumer, &dep_pom_b), 0); + cbm_store_close(consumer); + + const char *targets[] = {"provider"}; + cbm_cross_repo_result_t result = cbm_cross_repo_match("consumer", targets, 1); + ASSERT_EQ(result.library_edges, 2); + + consumer = cbm_store_open_path(fx.consumer_db); + ASSERT_NOT_NULL(consumer); + ASSERT_EQ(count_nodes_by_label(consumer, "consumer", "Library"), 2); + ASSERT_TRUE(edge_props_are_valid_json(consumer, "consumer", "CROSS_LIBRARY_DEPENDS_ON")); + cbm_store_close(consumer); + + cleanup_cross_maven_fixture(&fx); + PASS(); +} + TEST(usages_creates_edges) { /* Port of TestPassUsagesCreatesEdges. * Go source with callback reference → USAGE edge. */ @@ -5853,6 +6688,22 @@ SUITE(pipeline) { /* Incremental reindex */ /* FastAPI Depends edge tracking (PR #66 port) */ RUN_TEST(pipeline_fastapi_depends_edges); + /* Cross-repo library dependency linking */ + RUN_TEST(cross_repo_project_list_initial_alloc_failure_returns_empty); + RUN_TEST(cross_repo_maven_failed_growth_preserves_capacity); + RUN_TEST(cross_repo_maven_growth_rejects_byte_overflow_before_realloc); + RUN_TEST(cross_repo_maven_dependency_creates_library_edges); + RUN_TEST(cross_repo_maven_dependency_escapes_library_edge_props); + RUN_TEST(cross_repo_maven_dependency_management_does_not_create_library_edge); + RUN_TEST(cross_repo_maven_commented_dependency_does_not_create_library_edge); + RUN_TEST(cross_repo_maven_plugin_dependency_does_not_create_library_edge); + RUN_TEST(cross_repo_maven_cleanup_preserves_unrelated_nodes); + RUN_TEST(cross_repo_maven_provider_rerun_preserves_incoming_used_by); + RUN_TEST(cross_repo_maven_removed_dependency_clears_provider_used_by); + RUN_TEST(cross_repo_maven_long_coordinates_do_not_collide); + RUN_TEST(cross_repo_maven_fixture_restores_cache_dir); + RUN_TEST(cross_repo_maven_long_references_do_not_collide); + RUN_TEST(cross_repo_maven_very_long_pom_paths_do_not_truncate); /* Incremental */ RUN_TEST(incremental_full_then_noop); RUN_TEST(incremental_detects_changed_file);