diff --git a/Makefile.cbm b/Makefile.cbm index 3ff50b81..729ef908 100644 --- a/Makefile.cbm +++ b/Makefile.cbm @@ -129,6 +129,8 @@ EXTRACTION_SRCS = \ $(CBM_DIR)/extract_k8s.c \ $(CBM_DIR)/helpers.c \ $(CBM_DIR)/lang_specs.c \ + $(CBM_DIR)/macro_table.c \ + $(CBM_DIR)/iris_export_xml.c \ $(CBM_DIR)/service_patterns.c # LSP resolvers (compiled as one unit via lsp_all.c) @@ -200,6 +202,7 @@ PIPELINE_SRCS = \ src/pipeline/pass_semantic_edges.c \ src/pipeline/pass_complexity.c \ src/pipeline/pass_cross_repo.c \ + src/pipeline/pass_ensemble_routing.c \ src/pipeline/artifact.c \ src/pipeline/pass_pkgmap.c diff --git a/internal/cbm/cbm.c b/internal/cbm/cbm.c index d611f186..42ae4398 100644 --- a/internal/cbm/cbm.c +++ b/internal/cbm/cbm.c @@ -497,6 +497,15 @@ static int count_params_from_signature(const char *sig) { CBMFileResult *cbm_extract_file(const char *source, int source_len, CBMLanguage language, const char *project, const char *rel_path, int64_t timeout_micros, const char **extra_defines, const char **include_paths) { + return cbm_extract_file_ex(source, source_len, language, project, rel_path, timeout_micros, + extra_defines, include_paths, NULL, NULL); +} + +CBMFileResult *cbm_extract_file_ex(const char *source, int source_len, CBMLanguage language, + const char *project, const char *rel_path, + int64_t timeout_micros, const char **extra_defines, + const char **include_paths, const CBMMacroTable *macro_table, + const CBMReturnTypeTable *return_type_table) { // Allocate result on heap (arena inside for all string data) enum { SINGLE = 1 }; CBMFileResult *result = (CBMFileResult *)calloc(SINGLE, sizeof(CBMFileResult)); @@ -580,6 +589,8 @@ CBMFileResult *cbm_extract_file(const char *source, int source_len, CBMLanguage .rel_path = rel_path, .module_qn = result->module_qn, .root = root, + .macro_table = macro_table, + .return_type_table = return_type_table, }; // Run extractors: defs + imports use separate walks (unique recursion patterns), diff --git a/internal/cbm/cbm.h b/internal/cbm/cbm.h index cc3607ee..9bbc60c7 100644 --- a/internal/cbm/cbm.h +++ b/internal/cbm/cbm.h @@ -164,12 +164,15 @@ typedef enum { CBM_LANG_APEX, CBM_LANG_SOQL, CBM_LANG_SOSL, - CBM_LANG_KUSTOMIZE, // kustomization.yaml — Kubernetes overlay tool - CBM_LANG_K8S, // Generic Kubernetes manifest (apiVersion: detected) - CBM_LANG_PINE, // Pine Script (TradingView indicator / strategy language) - CBM_LANG_QML, // Qt QML (Qt Modeling Language — declarative UI + embedded JS) - CBM_LANG_CFSCRIPT, // CFML script dialect (.cfc components — Lucee/ColdFusion) - CBM_LANG_CFML, // CFML tag dialect (.cfm templates — Lucee/ColdFusion) + CBM_LANG_KUSTOMIZE, // kustomization.yaml — Kubernetes overlay tool + CBM_LANG_K8S, // Generic Kubernetes manifest (apiVersion: detected) + CBM_LANG_PINE, // Pine Script (TradingView indicator / strategy language) + CBM_LANG_QML, // Qt QML (Qt Modeling Language — declarative UI + embedded JS) + CBM_LANG_CFSCRIPT, // CFML script dialect (.cfc components — Lucee/ColdFusion) + CBM_LANG_CFML, // CFML tag dialect (.cfm templates — Lucee/ColdFusion) + CBM_LANG_OBJECTSCRIPT_UDL, // InterSystems ObjectScript UDL (.cls class files) + CBM_LANG_OBJECTSCRIPT_ROUTINE, // InterSystems ObjectScript routine (.mac/.int/.rtn/.inc) + CBM_LANG_OBJECTSCRIPT_EXPORT, // InterSystems Studio Export XML () CBM_LANG_COUNT } CBMLanguage; @@ -485,6 +488,24 @@ typedef struct { int count; } CBMStringConstantMap; +// Forward declaration: ObjectScript macro table (defined in macro_table.h). +typedef struct CBMMacroTable CBMMacroTable; + +// Method-return-type table for ObjectScript variable type inference. Populated +// from definition nodes (method QN -> declared return type) so a later +// `Set x = obj.Method()` can resolve x's class. +#define CBM_RETURN_TYPE_TABLE_CAP 2048 + +typedef struct { + const char *method_qn; + const char *return_type; +} CBMReturnTypeEntry; + +typedef struct { + CBMReturnTypeEntry entries[CBM_RETURN_TYPE_TABLE_CAP]; + int count; +} CBMReturnTypeTable; + typedef struct { CBMArena *arena; CBMFileResult *result; @@ -495,9 +516,11 @@ typedef struct { const char *rel_path; const char *module_qn; TSNode root; - EFCache ef_cache; // enclosing function cache - const char *enclosing_class_qn; // for nested class QN computation - CBMStringConstantMap string_constants; // module-level NAME = "value" pairs + EFCache ef_cache; // enclosing function cache + const char *enclosing_class_qn; // for nested class QN computation + CBMStringConstantMap string_constants; // module-level NAME = "value" pairs + const CBMMacroTable *macro_table; // ObjectScript $$$macro table (NULL if none) + const CBMReturnTypeTable *return_type_table; // ObjectScript method return types (NULL if none) } CBMExtractCtx; // --- Public API --- @@ -524,6 +547,18 @@ CBMFileResult *cbm_extract_file(const char *source, int source_len, CBMLanguage const char **include_paths // NULL-terminated, or NULL ); +// Pipeline-internal variant of cbm_extract_file() carrying ObjectScript +// per-project tables (macro table + method-return-type table). The public +// cbm_extract_file() is a thin wrapper that passes NULL, NULL for both. +CBMFileResult *cbm_extract_file_ex( + const char *source, int source_len, CBMLanguage language, const char *project, + const char *rel_path, int64_t timeout_micros, + const char **extra_defines, // NULL-terminated, or NULL + const char **include_paths, // NULL-terminated, or NULL + const CBMMacroTable *macro_table, // ObjectScript macros, or NULL + const CBMReturnTypeTable *return_type_table // OS return types, or NULL +); + // Free all memory associated with a result. void cbm_free_result(CBMFileResult *result); diff --git a/internal/cbm/extract_calls.c b/internal/cbm/extract_calls.c index 98b924b8..f506af98 100644 --- a/internal/cbm/extract_calls.c +++ b/internal/cbm/extract_calls.c @@ -2,6 +2,7 @@ #include "arena.h" // CBMArena, cbm_arena_sprintf #include "helpers.h" #include "lang_specs.h" +#include "macro_table.h" #include "extract_unified.h" #include "foundation/constants.h" #include "extract_node_stack.h" @@ -592,6 +593,60 @@ static char *extract_callee_lang_specific(CBMArena *a, TSNode node, const char * if (lang == CBM_LANG_SWIFT) { return extract_swift_callee(a, node, source, nk); } + if (lang == CBM_LANG_OBJECTSCRIPT_UDL || lang == CBM_LANG_OBJECTSCRIPT_ROUTINE) { + // ##class(Pkg.Class).Method() -> "Pkg.Class.Method" + if (strcmp(nk, "class_method_call") == 0) { + TSNode class_ref = cbm_find_child_by_kind(node, "class_ref"); + TSNode method_name = cbm_find_child_by_kind(node, "method_name"); + if (!ts_node_is_null(class_ref) && !ts_node_is_null(method_name)) { + TSNode cname = cbm_find_child_by_kind(class_ref, "class_name"); + if (ts_node_is_null(cname)) { + return NULL; + } + char *cls = cbm_node_text(a, cname, source); + if (!cls || !cls[0]) { + return NULL; + } + TSNode mname_ident = ts_node_named_child_count(method_name) > 0 + ? ts_node_named_child(method_name, 0) + : (TSNode){0}; + if (ts_node_is_null(mname_ident)) { + return cls; + } + char *meth = cbm_node_text(a, mname_ident, source); + if (!meth || !meth[0]) { + return cls; + } + return cbm_arena_sprintf(a, "%s.%s", cls, meth); + } + return NULL; + } + // $$label^routine extrinsic / routine tag call -> the line_ref text + if (strcmp(nk, "routine_tag_call") == 0) { + TSNode line_ref = cbm_find_child_by_kind(node, "line_ref"); + if (!ts_node_is_null(line_ref)) { + return cbm_node_text(a, line_ref, source); + } + return NULL; + } + // $$$Macro(...) -> raw "$$$Name" callee (expanded later in handle_calls) + if (strcmp(nk, "macro") == 0) { + char *raw = cbm_node_text(a, node, source); + if (!raw || raw[0] != '$' || raw[1] != '$' || raw[2] != '$') { + return NULL; + } + char *name_start = raw + 3; + char *paren = strchr(name_start, '('); + if (paren) { + *paren = '\0'; + } + if (!name_start[0]) { + return NULL; + } + return cbm_arena_sprintf(a, "$$$%s", name_start); + } + return NULL; + } return extract_scripting_callee(a, node, source, lang, nk); } @@ -1120,6 +1175,72 @@ static void extract_jsx_component_ref(CBMExtractCtx *ctx, TSNode node, const cha } } +// ObjectScript: resolve `var.Method(...)` / `..Property.Method(...)` instance +// calls against the per-method variable type map. Returns arena "Class.Method" +// or NULL if the receiver's type is unknown. +static char *resolve_objectscript_instance_call(CBMArena *a, TSNode node, const char *source, + os_type_map_t *type_map) { + TSNode receiver = {0}; + TSNode oref = {0}; + const char *nk_first = NULL; + for (uint32_t i = 0; i < ts_node_named_child_count(node); i++) { + TSNode child = ts_node_named_child(node, i); + const char *ck = ts_node_type(child); + if (strcmp(ck, "lvn") == 0 || strcmp(ck, "variable") == 0) { + receiver = child; + } else if (strcmp(ck, "relative_dot_property") == 0) { + receiver = child; + nk_first = "relative_dot_property"; + } else if (strcmp(ck, "oref_method") == 0) { + oref = child; + } + } + if (ts_node_is_null(oref)) { + return NULL; + } + TSNode method_name_node = cbm_find_child_by_kind(oref, "method_name"); + if (ts_node_is_null(method_name_node)) { + return NULL; + } + TSNode mn_ident = ts_node_named_child_count(method_name_node) > 0 + ? ts_node_named_child(method_name_node, 0) + : (TSNode){0}; + if (ts_node_is_null(mn_ident)) { + return NULL; + } + char *method = cbm_node_text(a, mn_ident, source); + if (!method || !method[0]) { + return NULL; + } + if (ts_node_is_null(receiver)) { + return NULL; + } + char *var_text = NULL; + if (nk_first && strcmp(nk_first, "relative_dot_property") == 0) { + TSNode prop_name = cbm_find_child_by_kind(receiver, "member_name"); + if (!ts_node_is_null(prop_name)) { + char *pname = cbm_node_text(a, prop_name, source); + if (pname && pname[0]) { + var_text = cbm_arena_sprintf(a, "..%s", pname); + } + } + if (!var_text) { + var_text = cbm_node_text(a, receiver, source); + } + } else { + var_text = cbm_node_text(a, receiver, source); + } + if (!var_text || !var_text[0]) { + return NULL; + } + for (int i = 0; i < type_map->count; i++) { + if (strcasecmp(type_map->entries[i].var_name, var_text) == 0) { + return cbm_arena_sprintf(a, "%s.%s", type_map->entries[i].class_name, method); + } + } + return NULL; +} + void handle_calls(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec *spec, WalkState *state) { if (!spec->call_node_types || !spec->call_node_types[0]) { return; @@ -1127,6 +1248,56 @@ void handle_calls(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec *spec, Walk if (cbm_kind_in_set(node, spec->call_node_types)) { char *callee = extract_callee_name(ctx->arena, node, ctx->source, ctx->language); + + // ObjectScript: var.Method() / ..Property.Method() instance dispatch. + if (!callee && + (ctx->language == CBM_LANG_OBJECTSCRIPT_UDL || + ctx->language == CBM_LANG_OBJECTSCRIPT_ROUTINE) && + strcmp(ts_node_type(node), "instance_method_call") == 0) { + callee = resolve_objectscript_instance_call(ctx->arena, node, ctx->source, + &state->os_type_map); + } + + // ObjectScript: ..Method() oref self-call resolves against the enclosing class. + if (!callee && + (ctx->language == CBM_LANG_OBJECTSCRIPT_UDL || + ctx->language == CBM_LANG_OBJECTSCRIPT_ROUTINE) && + strcmp(ts_node_type(node), "relative_dot_method") == 0 && state->enclosing_class_qn && + state->enclosing_class_qn[0]) { + TSNode oref = cbm_find_child_by_kind(node, "oref_method"); + if (!ts_node_is_null(oref)) { + TSNode mname_node = cbm_find_child_by_kind(oref, "method_name"); + if (!ts_node_is_null(mname_node)) { + TSNode ident = ts_node_named_child_count(mname_node) > 0 + ? ts_node_named_child(mname_node, 0) + : (TSNode){0}; + if (!ts_node_is_null(ident)) { + char *mname = cbm_node_text(ctx->arena, ident, ctx->source); + if (mname && mname[0]) { + callee = cbm_arena_sprintf(ctx->arena, "%s.%s", + state->enclosing_class_qn, mname); + } + } + } + } + } + + // ObjectScript: expand a $$$Macro callee via the macro table. + if (callee && callee[0] == '$' && callee[1] == '$' && callee[2] == '$' && + ctx->macro_table) { + const char *macro_name = callee + 3; + const CBMMacroEntry *entry = cbm_macro_table_find(ctx->macro_table, macro_name); + if (entry) { + if (entry->resolved_callee) { + callee = cbm_arena_strdup(ctx->arena, entry->resolved_callee); + } else if (entry->expansion) { + callee = cbm_macro_extract_callee(ctx->arena, entry->expansion); + } else { + callee = NULL; + } + } + } + if (callee && callee[0] && !cbm_is_keyword(callee, ctx->language)) { CBMCall call = {0}; call.callee_name = callee; @@ -1136,12 +1307,47 @@ void handle_calls(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec *spec, Walk call.start_line = (int)ts_node_start_point(node).row + TS_LINE_OFFSET; TSNode args = ts_node_child_by_field_name(node, TS_FIELD("arguments")); + // ObjectScript stores args under oref_method/method_args, not the + // generic "arguments" field. + if (ts_node_is_null(args) && (ctx->language == CBM_LANG_OBJECTSCRIPT_UDL || + ctx->language == CBM_LANG_OBJECTSCRIPT_ROUTINE)) { + TSNode oref = cbm_find_child_by_kind(node, "oref_method"); + if (!ts_node_is_null(oref)) { + args = cbm_find_child_by_kind(oref, "method_args"); + } + if (ts_node_is_null(args)) { + args = cbm_find_child_by_kind(node, "method_args"); + } + } if (!ts_node_is_null(args)) { call.first_string_arg = extract_url_or_topic_arg(ctx, args); if (call.first_string_arg && call.first_string_arg[0] == '/') { call.second_arg_name = extract_handler_arg(ctx, args); } - extract_call_args(ctx, args, &call); + if (ctx->language == CBM_LANG_OBJECTSCRIPT_UDL || + ctx->language == CBM_LANG_OBJECTSCRIPT_ROUTINE) { + for (uint32_t ai = 0; + ai < ts_node_named_child_count(args) && call.arg_count < CBM_MAX_CALL_ARGS; + ai++) { + TSNode achild = ts_node_named_child(args, ai); + const char *ack = ts_node_type(achild); + if (strcmp(ack, "bracket") == 0) { + continue; + } + if (strcmp(ack, "method_arg") != 0) { + continue; + } + CBMCallArg *ca = &call.args[call.arg_count]; + memset(ca, 0, sizeof(*ca)); + ca->index = call.arg_count; + ca->expr = cbm_node_text(ctx->arena, achild, ctx->source); + if (ca->expr && ca->expr[0]) { + call.arg_count++; + } + } + } else { + extract_call_args(ctx, args, &call); + } } cbm_calls_push(&ctx->result->calls, ctx->arena, call); diff --git a/internal/cbm/extract_defs.c b/internal/cbm/extract_defs.c index 913268d8..323de9d4 100644 --- a/internal/cbm/extract_defs.c +++ b/internal/cbm/extract_defs.c @@ -8,6 +8,7 @@ #include "semantic/ast_profile.h" #include "tree_sitter/api.h" // TSNode, ts_node_* #include // uint32_t +#include // snprintf (ObjectScript storage/trigger sidecars) #include #include @@ -92,7 +93,9 @@ static char *extract_body_ident_tokens(CBMExtractCtx *ctx, TSNode body) { if (nc == 0) { const char *k = ts_node_type(nd); if (strcmp(k, "identifier") == 0 || strcmp(k, "field_identifier") == 0 || - strcmp(k, "property_identifier") == 0) { + strcmp(k, "property_identifier") == 0 || + strcmp(k, "objectscript_identifier") == 0 || + strcmp(k, "identifier_segment_immediate") == 0) { uint32_t s = ts_node_start_byte(nd); int len = (int)(ts_node_end_byte(nd) - s); if (len > 0 && len < CBM_SZ_64 && s < (uint32_t)ctx->source_len) { @@ -693,6 +696,25 @@ static TSNode resolve_func_name(TSNode node, CBMLanguage lang) { return null_node; } + // ObjectScript routine tag is its own name node. + if (lang == CBM_LANG_OBJECTSCRIPT_ROUTINE && strcmp(kind, "tag") == 0) { + return node; + } + // ObjectScript method/classmethod: name lives under method_definition -> + // method_name -> first named child. + if (lang == CBM_LANG_OBJECTSCRIPT_UDL && + (strcmp(kind, "method") == 0 || strcmp(kind, "classmethod") == 0)) { + TSNode mdef = cbm_find_child_by_kind(node, "method_definition"); + if (!ts_node_is_null(mdef)) { + TSNode mname = cbm_find_child_by_kind(mdef, "method_name"); + if (!ts_node_is_null(mname) && ts_node_named_child_count(mname) > 0) { + return ts_node_named_child(mname, 0); + } + } + TSNode null_node = {0}; + return null_node; + } + TSNode name = func_name_node(node); if (lang == CBM_LANG_R && strcmp(kind, "function_definition") == 0) { @@ -1952,6 +1974,37 @@ static const char **extract_julia_base_classes(CBMArena *a, TSNode node, const c static const char **extract_base_classes(CBMArena *a, TSNode node, const char *source, CBMLanguage lang) { + // ObjectScript: `Class X Extends (A, B)` — bases are class_name children of + // the class_extends node. + if (lang == CBM_LANG_OBJECTSCRIPT_UDL) { + TSNode ext = cbm_find_child_by_kind(node, "class_extends"); + if (!ts_node_is_null(ext)) { + const char *bases[MAX_BASES]; + int base_count = 0; + uint32_t nc = ts_node_named_child_count(ext); + for (uint32_t i = 0; i < nc && base_count < MAX_BASES_MINUS_1; i++) { + TSNode ch = ts_node_named_child(ext, i); + if (strcmp(ts_node_type(ch), "class_name") == 0) { + char *base = cbm_node_text(a, ch, source); + if (base && base[0]) { + bases[base_count++] = base; + } + } + } + if (base_count > 0) { + const char **result = + (const char **)cbm_arena_alloc(a, (base_count + 1) * sizeof(const char *)); + if (result) { + for (int i = 0; i < base_count; i++) { + result[i] = bases[i]; + } + result[base_count] = NULL; + return result; + } + } + } + return NULL; + } // Languages whose heritage is not exposed via a tree-sitter field need // dedicated walkers; the generic field/keyword path mis-captures them. if (lang == CBM_LANG_TYPESCRIPT || lang == CBM_LANG_TSX) { @@ -2977,6 +3030,10 @@ static void extract_class_def(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec if (ts_node_is_null(name_node) && ctx->language == CBM_LANG_OBJC) { name_node = cbm_find_child_by_kind(node, "identifier"); } + // ObjectScript UDL: class name is a `class_name` child (no "name" field). + if (ts_node_is_null(name_node) && ctx->language == CBM_LANG_OBJECTSCRIPT_UDL) { + name_node = cbm_find_child_by_kind(node, "class_name"); + } // Swift and newer tree-sitter-kotlin: class/object name is a type_identifier // child (no "name" field). if (ts_node_is_null(name_node) && @@ -3473,6 +3530,22 @@ static TSNode resolve_method_name(TSNode child, CBMLanguage lang) { return cbm_find_child_by_kind(child, "identifier"); } + // ObjectScript method/classmethod: name under method_definition->method_name. + if (lang == CBM_LANG_OBJECTSCRIPT_UDL && + (strcmp(ck, "method") == 0 || strcmp(ck, "classmethod") == 0)) { + TSNode mdef = cbm_find_child_by_kind(child, "method_definition"); + if (!ts_node_is_null(mdef)) { + TSNode mname = cbm_find_child_by_kind(mdef, "method_name"); + if (!ts_node_is_null(mname) && ts_node_named_child_count(mname) > 0) { + return ts_node_named_child(mname, 0); + } + } + } + // ObjectScript query member. + if (lang == CBM_LANG_OBJECTSCRIPT_UDL && strcmp(ck, "query") == 0) { + return cbm_find_child_by_kind(child, "query_name"); + } + if (strcmp(ck, "arrow_function") == 0) { return resolve_arrow_func_name(child); } @@ -3506,6 +3579,11 @@ static void push_method_def(CBMExtractCtx *ctx, TSNode child, const char *class_ def.is_exported = cbm_is_exported(name, ctx->language); TSNode params = ts_node_child_by_field_name(child, TS_FIELD("parameters")); + // ObjectScript exposes the parameter list under a `parameter_list` field. + if (ts_node_is_null(params) && (ctx->language == CBM_LANG_OBJECTSCRIPT_UDL || + ctx->language == CBM_LANG_OBJECTSCRIPT_ROUTINE)) { + params = ts_node_child_by_field_name(child, TS_FIELD("parameter_list")); + } if (!ts_node_is_null(params)) { def.signature = cbm_node_text(a, params, ctx->source); def.param_types = extract_param_types(a, params, ctx->source, ctx->language); @@ -3523,6 +3601,22 @@ static void push_method_def(CBMExtractCtx *ctx, TSNode child, const char *class_ } } + // ObjectScript: return type is method_definition -> return_type -> typename. + if (!def.return_type && (ctx->language == CBM_LANG_OBJECTSCRIPT_UDL || + ctx->language == CBM_LANG_OBJECTSCRIPT_ROUTINE)) { + TSNode mdef = cbm_find_child_by_kind(child, "method_definition"); + if (ts_node_is_null(mdef)) { + mdef = child; + } + TSNode rt_node = cbm_find_child_by_kind(mdef, "return_type"); + if (!ts_node_is_null(rt_node)) { + TSNode tname = cbm_find_child_by_kind(rt_node, "typename"); + if (!ts_node_is_null(tname)) { + def.return_type = cbm_node_text(a, tname, ctx->source); + } + } + } + // C++: trailing return type (auto method() -> Type) if (def.return_type && strcmp(def.return_type, "auto") == 0 && (ctx->language == CBM_LANG_CPP || ctx->language == CBM_LANG_CUDA)) { @@ -3604,6 +3698,19 @@ static void extract_class_methods(CBMExtractCtx *ctx, TSNode class_node, const c method_node = def; } + // ObjectScript UDL wraps each method/classmethod in a class_statement. + if (ctx->language == CBM_LANG_OBJECTSCRIPT_UDL && + strcmp(ts_node_type(child), "class_statement") == 0) { + if (ts_node_named_child_count(child) == 0) { + continue; + } + TSNode inner = ts_node_named_child(child, 0); + if (!cbm_kind_in_set(inner, spec->function_node_types)) { + continue; + } + method_node = inner; + } + if (!cbm_kind_in_set(method_node, spec->function_node_types)) { continue; } @@ -4762,6 +4869,14 @@ static void extract_class_fields(CBMExtractCtx *ctx, TSNode class_node, const ch uint32_t count = ts_node_named_child_count(body); for (uint32_t i = 0; i < count; i++) { TSNode child = ts_node_named_child(body, i); + + // ObjectScript UDL wraps each member in a class_statement node. + if (ctx->language == CBM_LANG_OBJECTSCRIPT_UDL && + strcmp(ts_node_type(child), "class_statement") == 0 && + ts_node_named_child_count(child) > 0) { + child = ts_node_named_child(child, 0); + } + if (!cbm_kind_in_set(child, spec->field_node_types)) { continue; } @@ -4770,6 +4885,211 @@ static void extract_class_fields(CBMExtractCtx *ctx, TSNode class_node, const ch continue; } + // ObjectScript UDL member extraction. property/parameter -> Variable; + // index/trigger/xdata/storage/foreignkey -> labelled members with + // storage-XML and trigger-body sidecars. + if (ctx->language == CBM_LANG_OBJECTSCRIPT_UDL) { + if (strcmp(ts_node_type(child), "property") == 0 || + strcmp(ts_node_type(child), "parameter") == 0) { + TSNode pname = cbm_find_child_by_kind(child, "property_name"); + if (ts_node_is_null(pname)) { + pname = cbm_find_child_by_kind(child, "parameter_name"); + } + if (!ts_node_is_null(pname) && ts_node_named_child_count(pname) > 0) { + TSNode ident = ts_node_named_child(pname, 0); + char *pn = cbm_node_text(a, ident, ctx->source); + if (pn && pn[0]) { + CBMDefinition pdef; + memset(&pdef, 0, sizeof(pdef)); + pdef.name = pn; + pdef.qualified_name = cbm_arena_sprintf(a, "%s.%s", class_qn, pn); + pdef.label = "Variable"; + pdef.file_path = ctx->rel_path; + pdef.parent_class = class_qn; + pdef.start_line = ts_node_start_point(child).row + TS_LINE_OFFSET; + pdef.end_line = ts_node_end_point(child).row + TS_LINE_OFFSET; + cbm_defs_push(&ctx->result->defs, a, pdef); + } + } + continue; + } + + const char *ntype = ts_node_type(child); + const char *name_child_kind = NULL; + const char *member_label = NULL; + if (strcmp(ntype, "index") == 0) { + name_child_kind = "index_name"; + member_label = "Index"; + } else if (strcmp(ntype, "trigger") == 0) { + name_child_kind = "trigger_name"; + member_label = "Trigger"; + } else if (strcmp(ntype, "xdata") == 0) { + name_child_kind = "xdata_name"; + member_label = "XData"; + } else if (strcmp(ntype, "storage") == 0) { + name_child_kind = "storage_name"; + member_label = "Storage"; + } else if (strcmp(ntype, "foreignkey") == 0) { + name_child_kind = "foreignkey_name"; + member_label = "Variable"; + } + + if (name_child_kind) { + TSNode nname = cbm_find_child_by_kind(child, name_child_kind); + if (!ts_node_is_null(nname)) { + char *mn = cbm_node_text(a, nname, ctx->source); + if (mn && mn[0]) { + CBMDefinition mdef; + memset(&mdef, 0, sizeof(mdef)); + mdef.name = mn; + mdef.qualified_name = cbm_arena_sprintf(a, "%s.%s", class_qn, mn); + mdef.label = member_label; + mdef.file_path = ctx->rel_path; + mdef.parent_class = class_qn; + mdef.start_line = ts_node_start_point(child).row + TS_LINE_OFFSET; + mdef.end_line = ts_node_end_point(child).row + TS_LINE_OFFSET; + + if (strcmp(member_label, "Storage") == 0) { + TSNode sbody = cbm_find_child_by_kind(child, "storage_body"); + if (!ts_node_is_null(sbody)) { + char *xml = cbm_node_text(a, sbody, ctx->source); + if (xml) { + char props[CBM_SZ_2K]; + int pos = snprintf(props, sizeof(props), "{"); + static const struct { + const char *tag; + const char *key; + } kv[] = {{"ExtentSize", "extent_size"}, + {"DataLocation", "data_global"}, + {"IdLocation", "id_global"}, + {"IndexLocation", "index_global"}, + {"StreamLocation", "stream_global"}, + {"Type", "storage_type"}, + {NULL, NULL}}; + bool first = true; + for (int ki = 0; kv[ki].tag; ki++) { + char open[64], close[64], buf[256]; + snprintf(open, sizeof(open), "<%s>", kv[ki].tag); + snprintf(close, sizeof(close), "", kv[ki].tag); + const char *s = strstr(xml, open); + if (!s) { + continue; + } + s += strlen(open); + const char *e = strstr(s, close); + if (!e) { + continue; + } + size_t vlen = (size_t)(e - s); + if (vlen >= sizeof(buf)) { + vlen = sizeof(buf) - 1; + } + memcpy(buf, s, vlen); + buf[vlen] = '\0'; + char esc[300]; + int ei = 0; + for (size_t ci = 0; ci < vlen && ei < (int)sizeof(esc) - 2; + ci++) { + if (buf[ci] == '"' || buf[ci] == '\\') { + esc[ei++] = '\\'; + } + esc[ei++] = buf[ci]; + } + esc[ei] = '\0'; + if (pos < 0 || pos >= (int)sizeof(props) - 1) { + break; // buffer full — stop appending + } + pos += snprintf(props + pos, sizeof(props) - (size_t)pos, + "%s\"%s\":\"%s\"", first ? "" : ",", + kv[ki].key, esc); + if (pos >= (int)sizeof(props)) { + pos = (int)sizeof(props) - 1; // truncated + } + first = false; + } + const char *sql_tag = ""; + const char *sql_end = ""; + char sql_map_buf[512]; + int smi = 0; + const char *sp = xml; + bool sql_first = true; + while ((sp = strstr(sp, sql_tag)) != NULL) { + sp += strlen(sql_tag); + const char *ep = strstr(sp, sql_end); + if (!ep) { + break; + } + size_t glen = (size_t)(ep - sp); + if (smi + (int)glen + 2 < (int)sizeof(sql_map_buf) - 1) { + if (!sql_first) { + sql_map_buf[smi++] = ' '; + } + memcpy(sql_map_buf + smi, sp, glen); + smi += (int)glen; + sql_first = false; + } + sp = ep + strlen(sql_end); + } + sql_map_buf[smi] = '\0'; + if (smi > 0 && pos >= 0 && pos < (int)sizeof(props) - 1) { + pos += snprintf(props + pos, sizeof(props) - (size_t)pos, + "%s\"sql_map_globals\":\"%s\"", + first ? "" : ",", sql_map_buf); + if (pos >= (int)sizeof(props)) { + pos = (int)sizeof(props) - 1; // truncated + } + first = false; + } + if (pos < (int)sizeof(props) - 1) { + props[pos++] = '}'; + props[pos] = '\0'; + } + if (!first) { + mdef.docstring = cbm_arena_strdup(a, props); + } + } + } + } + + if (strcmp(member_label, "Trigger") == 0) { + TSNode tbody = cbm_find_child_by_kind(child, "core_trigger"); + if (ts_node_is_null(tbody)) { + tbody = cbm_find_child_by_kind(child, "external_trigger"); + } + if (!ts_node_is_null(tbody)) { + mdef.body_tokens = extract_body_ident_tokens(ctx, tbody); + char *raw = cbm_node_text(a, tbody, ctx->source); + if (raw && raw[0]) { + char esc[CBM_SZ_512]; + int ei = 0; + for (int ci = 0; raw[ci] && ei < (int)sizeof(esc) - 3; ci++) { + if (raw[ci] == '"' || raw[ci] == '\\') { + esc[ei++] = '\\'; + } else if (raw[ci] == '\n') { + esc[ei++] = '\\'; + esc[ei++] = 'n'; + continue; + } else if (raw[ci] == '\r') { + continue; + } + esc[ei++] = raw[ci]; + } + esc[ei] = '\0'; + char props[CBM_SZ_512]; + snprintf(props, sizeof(props), "{\"trigger_body\":\"%s\"}", + esc); + mdef.docstring = cbm_arena_strdup(a, props); + } + } + } + + cbm_defs_push(&ctx->result->defs, a, mdef); + } + } + continue; + } + } + /* Locate the field's "type" + name node. Two shapes: * - direct (Java/Go/Rust/C/C++): * field_declaration .type=identifier .declarator=variable_declarator(.name) @@ -4926,6 +5246,9 @@ static const char *compute_class_qn(CBMExtractCtx *ctx, TSNode node, const char if (ts_node_is_null(name_node) && ctx->language == CBM_LANG_SWIFT) { name_node = cbm_find_child_by_kind(node, "type_identifier"); } + if (ts_node_is_null(name_node) && ctx->language == CBM_LANG_OBJECTSCRIPT_UDL) { + name_node = cbm_find_child_by_kind(node, "class_name"); + } if (!ts_node_is_null(name_node)) { char *cname = cbm_node_text(ctx->arena, name_node, ctx->source); if (cname && cname[0]) { diff --git a/internal/cbm/extract_unified.c b/internal/cbm/extract_unified.c index 7274158f..b5a6aad8 100644 --- a/internal/cbm/extract_unified.c +++ b/internal/cbm/extract_unified.c @@ -10,6 +10,7 @@ enum { MAX_INFRA_BINDINGS = 8 }; #include // uint32_t, uint8_t #include +#include // strcasecmp (ObjectScript type inference) // --- Scope stack management --- @@ -104,6 +105,225 @@ static TSNode resolve_func_name_node(TSNode node) { return name_node; } +// --- ObjectScript variable type inference (instance_method_call resolution) --- + +// Insert or update var_name -> class_name. Silent on overflow. +static void os_type_map_add(os_type_map_t *map, const char *var_name, const char *class_name) { + if (map->count >= OS_TYPE_MAP_CAP || !var_name || !class_name) { + return; + } + for (int i = 0; i < map->count; i++) { + if (strcmp(map->entries[i].var_name, var_name) == 0) { + map->entries[i].class_name = class_name; + return; + } + } + map->entries[map->count].var_name = var_name; + map->entries[map->count].class_name = class_name; + map->count++; +} + +// Locate the class_method_call inside an RHS expression (peeking through a +// couple of common ObjectScript expression container node types). +static TSNode find_class_method_call(TSNode root, const char *end) { + (void)end; + if (strcmp(ts_node_type(root), "class_method_call") == 0) { + return root; + } + static const char *containers[] = {"expression", "expr_atom", NULL}; + for (const char **c = containers; *c; c++) { + TSNode inner = cbm_find_child_by_kind(root, *c); + if (!ts_node_is_null(inner)) { + TSNode hit = cbm_find_child_by_kind(inner, "class_method_call"); + if (!ts_node_is_null(hit)) { + return hit; + } + TSNode inner2 = cbm_find_child_by_kind(inner, "expr_atom"); + if (!ts_node_is_null(inner2)) { + hit = cbm_find_child_by_kind(inner2, "class_method_call"); + if (!ts_node_is_null(hit)) { + return hit; + } + } + } + } + return cbm_find_child_by_kind(root, "class_method_call"); +} + +// On a `Set var = ##class(X).%New()` (or %OpenId/%Open, or a method whose +// return type is known) map var -> X. On a class `Property`/`Relationship`, +// map `..PropName -> typename` (surviving method-scope resets). +static void handle_objectscript_type_map(CBMExtractCtx *ctx, TSNode node, WalkState *state) { + if (ctx->language != CBM_LANG_OBJECTSCRIPT_UDL && + ctx->language != CBM_LANG_OBJECTSCRIPT_ROUTINE) { + return; + } + + const char *nk = ts_node_type(node); + + if (strcmp(nk, "command_set") == 0) { + for (uint32_t i = 0; i < ts_node_named_child_count(node); i++) { + TSNode set_arg = ts_node_named_child(node, i); + const char *sak = ts_node_type(set_arg); + if (strcmp(sak, "set_argument") != 0 && strcmp(sak, "assignment") != 0) { + continue; + } + TSNode lhs = {0}; + TSNode rhs = {0}; + for (uint32_t j = 0; j < ts_node_named_child_count(set_arg); j++) { + TSNode achild = ts_node_named_child(set_arg, j); + const char *ak = ts_node_type(achild); + if (strcmp(ak, "set_target") == 0 || strcmp(ak, "lvn") == 0 || + strcmp(ak, "variable") == 0 || strcmp(ak, "glvn") == 0) { + lhs = achild; + } else if (strcmp(ak, "expression") == 0 || strcmp(ak, "expr_atom") == 0 || + strcmp(ak, "class_method_call") == 0) { + rhs = achild; + } + } + if (ts_node_is_null(lhs) || ts_node_is_null(rhs)) { + continue; + } + + TSNode cm_call = find_class_method_call(rhs, NULL); + if (ts_node_is_null(cm_call)) { + continue; + } + + TSNode method_name_node = cbm_find_child_by_kind(cm_call, "method_name"); + if (ts_node_is_null(method_name_node)) { + continue; + } + TSNode mn_ident = ts_node_named_child_count(method_name_node) > 0 + ? ts_node_named_child(method_name_node, 0) + : (TSNode){0}; + if (ts_node_is_null(mn_ident)) { + continue; + } + char *method_text = cbm_node_text(ctx->arena, mn_ident, ctx->source); + if (!method_text) { + continue; + } + + TSNode class_ref = cbm_find_child_by_kind(cm_call, "class_ref"); + if (ts_node_is_null(class_ref)) { + continue; + } + TSNode cname = cbm_find_child_by_kind(class_ref, "class_name"); + if (ts_node_is_null(cname)) { + continue; + } + char *cls = cbm_node_text(ctx->arena, cname, ctx->source); + if (!cls || !cls[0]) { + continue; + } + + bool is_constructor = + (strcasecmp(method_text, "%New") == 0 || strcasecmp(method_text, "%OpenId") == 0 || + strcasecmp(method_text, "%Open") == 0); + if (!is_constructor) { + if (!ctx->return_type_table) { + continue; + } + char *method_qn = cbm_arena_sprintf(ctx->arena, "%s.%s", cls, method_text); + for (int rti = 0; rti < ctx->return_type_table->count; rti++) { + if (strcasecmp(ctx->return_type_table->entries[rti].method_qn, method_qn) == + 0) { + cls = cbm_arena_strdup(ctx->arena, + ctx->return_type_table->entries[rti].return_type); + is_constructor = true; + break; + } + } + if (!is_constructor) { + continue; + } + } + + TSNode var_node = lhs; + TSNode inner = cbm_find_child_by_kind(lhs, "glvn"); + if (!ts_node_is_null(inner)) { + var_node = inner; + } + inner = cbm_find_child_by_kind(var_node, "lvn"); + if (!ts_node_is_null(inner)) { + var_node = inner; + } + char *var = cbm_node_text(ctx->arena, var_node, ctx->source); + if (!var || !var[0]) { + continue; + } + + os_type_map_add(&state->os_type_map, var, cls); + } + } + + if (strcmp(nk, "property") == 0 || strcmp(nk, "relationship") == 0) { + TSNode prop_name_node = cbm_find_child_by_kind(node, "property_name"); + if (ts_node_is_null(prop_name_node)) { + prop_name_node = cbm_find_child_by_kind(node, "relationship_name"); + } + TSNode ret_type = cbm_find_child_by_kind(node, "return_type"); + if (!ts_node_is_null(prop_name_node) && !ts_node_is_null(ret_type)) { + TSNode tname = cbm_find_child_by_kind(ret_type, "typename"); + if (!ts_node_is_null(tname)) { + char *pname = cbm_node_text(ctx->arena, prop_name_node, ctx->source); + char *ptype = cbm_node_text(ctx->arena, tname, ctx->source); + if (pname && pname[0] && ptype && ptype[0]) { + char *dot_name = cbm_arena_sprintf(ctx->arena, "..%s", pname); + os_type_map_add(&state->os_type_map, dot_name, ptype); + state->os_type_map.class_base_count = state->os_type_map.count; + } + } + } + } +} + +// Resolve the FQN of an ObjectScript class_definition node (via its class_name). +static const char *objectscript_get_class_name(CBMExtractCtx *ctx, TSNode node) { + for (uint32_t i = 0; i < ts_node_named_child_count(node); i++) { + TSNode child = ts_node_named_child(node, i); + if (strcmp(ts_node_type(child), "class_name") == 0) { + char *name = cbm_node_text(ctx->arena, child, ctx->source); + if (name && name[0]) { + return cbm_fqn_compute(ctx->arena, ctx->project, ctx->rel_path, name); + } + } + } + return NULL; +} + +// Resolve the QN of an ObjectScript method/classmethod node for scope tracking. +static const char *objectscript_get_method_qn(CBMExtractCtx *ctx, TSNode node, + const char *enclosing_class_qn) { + const char *nk = ts_node_type(node); + if (strcmp(nk, "method") != 0 && strcmp(nk, "classmethod") != 0) { + return NULL; + } + for (uint32_t i = 0; i < ts_node_named_child_count(node); i++) { + TSNode child = ts_node_named_child(node, i); + if (strcmp(ts_node_type(child), "method_definition") == 0) { + for (uint32_t j = 0; j < ts_node_named_child_count(child); j++) { + TSNode mchild = ts_node_named_child(child, j); + if (strcmp(ts_node_type(mchild), "method_name") == 0) { + if (ts_node_named_child_count(mchild) > 0) { + TSNode ident = ts_node_named_child(mchild, 0); + char *name = cbm_node_text(ctx->arena, ident, ctx->source); + if (name && name[0]) { + if (enclosing_class_qn) { + return cbm_arena_sprintf(ctx->arena, "%s.%s", enclosing_class_qn, + name); + } + return cbm_fqn_compute(ctx->arena, ctx->project, ctx->rel_path, name); + } + } + } + } + } + } + return NULL; +} + // Compute function QN for scope tracking (mirrors cbm_enclosing_func_qn logic). static const char *compute_func_qn(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec *spec, WalkState *state) { @@ -111,6 +331,18 @@ static const char *compute_func_qn(CBMExtractCtx *ctx, TSNode node, const CBMLan if (ctx->language == CBM_LANG_WOLFRAM) { return compute_wolfram_func_qn(ctx, node); } + if (ctx->language == CBM_LANG_OBJECTSCRIPT_UDL) { + return objectscript_get_method_qn(ctx, node, state->enclosing_class_qn); + } + if (ctx->language == CBM_LANG_OBJECTSCRIPT_ROUTINE) { + if (strcmp(ts_node_type(node), "tag") == 0) { + char *name = cbm_node_text(ctx->arena, node, ctx->source); + if (name && name[0]) { + return cbm_fqn_compute(ctx->arena, ctx->project, ctx->rel_path, name); + } + } + return NULL; + } TSNode name_node = resolve_func_name_node(node); if (ts_node_is_null(name_node)) { @@ -130,6 +362,9 @@ static const char *compute_func_qn(CBMExtractCtx *ctx, TSNode node, const CBMLan // Compute class QN for scope tracking. static const char *compute_class_qn(CBMExtractCtx *ctx, TSNode node) { + if (ctx->language == CBM_LANG_OBJECTSCRIPT_UDL) { + return objectscript_get_class_name(ctx, node); + } TSNode name_node = ts_node_child_by_field_name(node, TS_FIELD("name")); /* Newer tree-sitter-kotlin: class/object name is a type_identifier child. */ if (ts_node_is_null(name_node) && ctx->language == CBM_LANG_KOTLIN) { @@ -793,11 +1028,58 @@ static void push_boundary_scopes(CBMExtractCtx *ctx, TSNode node, const CBMLangS const char *fqn = compute_func_qn(ctx, node, spec, state); if (fqn) { push_scope(state, SCOPE_FUNC, depth, fqn); + // ObjectScript: entering a method resets local var types (keeping + // class-level property types) and seeds the declared parameter types. + if (ctx->language == CBM_LANG_OBJECTSCRIPT_UDL || + ctx->language == CBM_LANG_OBJECTSCRIPT_ROUTINE) { + state->os_type_map.count = state->os_type_map.class_base_count; + TSNode mdef = cbm_find_child_by_kind(node, "method_definition"); + if (ts_node_is_null(mdef)) { + mdef = node; + } + TSNode args_node = cbm_find_child_by_kind(mdef, "arguments"); + if (!ts_node_is_null(args_node)) { + for (uint32_t ai = 0; ai < ts_node_named_child_count(args_node); ai++) { + TSNode arg = ts_node_named_child(args_node, ai); + if (strcmp(ts_node_type(arg), "argument") != 0) { + continue; + } + TSNode param_name_node = {0}; + TSNode type_node = {0}; + for (uint32_t pi = 0; pi < ts_node_named_child_count(arg); pi++) { + TSNode pchild = ts_node_named_child(arg, pi); + const char *pk = ts_node_type(pchild); + if (strcmp(pk, "method_arg") == 0) { + param_name_node = pchild; + } else if (strcmp(pk, "return_type") == 0) { + type_node = cbm_find_child_by_kind(pchild, "typename"); + } + } + if (!ts_node_is_null(param_name_node) && !ts_node_is_null(type_node)) { + TSNode lvn = cbm_find_child_by_kind(param_name_node, "expr_atom"); + if (ts_node_is_null(lvn)) { + lvn = param_name_node; + } + char *pname = cbm_node_text(ctx->arena, lvn, ctx->source); + char *ptype = cbm_node_text(ctx->arena, type_node, ctx->source); + if (pname && pname[0] && ptype && ptype[0]) { + os_type_map_add(&state->os_type_map, pname, ptype); + } + } + } + } + } } } else if (spec->class_node_types && cbm_kind_in_set(node, spec->class_node_types)) { const char *cqn = compute_class_qn(ctx, node); if (cqn) { push_scope(state, SCOPE_CLASS, depth, cqn); + // ObjectScript: a new class clears the type map entirely. + if (ctx->language == CBM_LANG_OBJECTSCRIPT_UDL || + ctx->language == CBM_LANG_OBJECTSCRIPT_ROUTINE) { + state->os_type_map.count = 0; + state->os_type_map.class_base_count = 0; + } } } else if (ctx->language == CBM_LANG_RUST && strcmp(ts_node_type(node), "impl_item") == 0) { TSNode type_node = ts_node_child_by_field_name(node, TS_FIELD("type")); @@ -848,6 +1130,7 @@ void cbm_extract_unified(CBMExtractCtx *ctx) { recompute_state(&state, ctx->module_qn); handle_string_constants(ctx, node, &state); + handle_objectscript_type_map(ctx, node, &state); handle_calls(ctx, node, spec, &state); handle_usages(ctx, node, spec, &state); handle_throws(ctx, node, spec, &state); diff --git a/internal/cbm/extract_unified.h b/internal/cbm/extract_unified.h index 6e2eb60f..ac9d6b64 100644 --- a/internal/cbm/extract_unified.h +++ b/internal/cbm/extract_unified.h @@ -14,6 +14,20 @@ #define MAX_SCOPES 64 +// ObjectScript type map: variable name → class name (for instance_method_call +// resolution). Stack-allocated, per-method scope. Overflow is silent (no crash). +#define OS_TYPE_MAP_CAP 64 +typedef struct { + const char *var_name; + const char *class_name; +} os_type_entry_t; + +typedef struct { + os_type_entry_t entries[OS_TYPE_MAP_CAP]; + int count; + int class_base_count; // entries [0,class_base_count) survive method-scope resets +} os_type_map_t; + // WalkState tracks scope context during the unified cursor walk. // Replaces parent-chain walks for enclosing_func_qn, inside_call, etc. typedef struct { @@ -30,6 +44,8 @@ typedef struct { uint8_t kind; } scopes[MAX_SCOPES]; int scope_top; + + os_type_map_t os_type_map; // ObjectScript variable → type mapping } WalkState; // Per-node handler prototypes. Each is called once per node during the diff --git a/internal/cbm/grammar_objectscript_routine.c b/internal/cbm/grammar_objectscript_routine.c new file mode 100644 index 00000000..9006eda3 --- /dev/null +++ b/internal/cbm/grammar_objectscript_routine.c @@ -0,0 +1,4 @@ +// Vendored tree-sitter grammar: objectscript_routine +// Each grammar compiled as separate unit (conflicting static symbols). +#include "vendored/grammars/objectscript_routine/parser.c" +#include "vendored/grammars/objectscript_routine/scanner.c" diff --git a/internal/cbm/grammar_objectscript_udl.c b/internal/cbm/grammar_objectscript_udl.c new file mode 100644 index 00000000..5aea58c4 --- /dev/null +++ b/internal/cbm/grammar_objectscript_udl.c @@ -0,0 +1,4 @@ +// Vendored tree-sitter grammar: objectscript_udl +// Each grammar compiled as separate unit (conflicting static symbols). +#include "vendored/grammars/objectscript_udl/parser.c" +#include "vendored/grammars/objectscript_udl/scanner.c" diff --git a/internal/cbm/iris_export_xml.c b/internal/cbm/iris_export_xml.c new file mode 100644 index 00000000..148d4a3a --- /dev/null +++ b/internal/cbm/iris_export_xml.c @@ -0,0 +1,444 @@ +#include "iris_export_xml.h" +#include "arena.h" +#include +#include +#include +#include + +#define EXPORT_MARKER "= sz) + vl = sz - 1; + memcpy(out, v, vl); + out[vl] = '\0'; + return; + } + } +} +static const char *elem_content(const char *p, const char *end, const char *tag, char *buf, + size_t bufsz) { + buf[0] = '\0'; + char open[MAX_NAME + 2]; + snprintf(open, sizeof(open), "<%s", tag); + const char *start = find_s(p, end, open); + if (!start) + return NULL; + const char *gt = find_s(start, end, ">"); + if (!gt) + return NULL; + if (is_self_closing(start, gt)) + return gt + 1; + const char *cs = gt + 1; + if (sw(cs, end, ""); + if (!ce) + return NULL; + size_t l = (size_t)(ce - cs); + if (l >= bufsz) + l = bufsz - 1; + memcpy(buf, cs, l); + buf[l] = '\0'; + return ce + 3; + } + char close[MAX_NAME + 4]; + snprintf(close, sizeof(close), "", tag); + const char *cl = find_s(cs, end, close); + if (!cl) + return NULL; + size_t l = (size_t)(cl - cs); + if (l >= bufsz) + l = bufsz - 1; + memcpy(buf, cs, l); + buf[l] = '\0'; + return cl + strlen(close); +} +static bool tag_is_one(const char *p, const char *end, const char *tag) { + char buf[8]; + return elem_content(p, end, tag, buf, sizeof(buf)) && strcmp(buf, "1") == 0; +} + +static void ub_init(UdlBuf *b, CBMArena *arena) { + b->buf = (char *)cbm_arena_alloc(arena, BUF_CAP); + b->pos = 0; + b->cap = BUF_CAP; + if (b->buf) + b->buf[0] = '\0'; +} +static void ub_app(UdlBuf *b, const char *s) { + if (!b->buf || !s) + return; + size_t n = strlen(s); + if (b->pos + (int)n + 1 >= b->cap) + return; + memcpy(b->buf + b->pos, s, n); + b->pos += (int)n; + b->buf[b->pos] = '\0'; +} + +static void emit_header(UdlBuf *b, const char *cs, const char *ce) { + char name[MAX_NAME]; + extract_attr(cs, ce, "name", name, sizeof(name)); + if (!name[0]) + return; + ub_app(b, "Class "); + ub_app(b, name); + char sup[MAX_NAME * 4] = ""; + elem_content(cs, ce, "Super", sup, sizeof(sup)); + if (sup[0]) { + if (strchr(sup, ',')) { + ub_app(b, " Extends ("); + ub_app(b, sup); + ub_app(b, ")"); + } else { + ub_app(b, " Extends "); + ub_app(b, sup); + } + } + char pragma[64] = ""; + if (tag_is_one(cs, ce, "Abstract")) + strncat(pragma, "Abstract,", sizeof(pragma) - strlen(pragma) - 1); + if (tag_is_one(cs, ce, "Final")) + strncat(pragma, "Final,", sizeof(pragma) - strlen(pragma) - 1); + if (pragma[0]) { + pragma[strlen(pragma) - 1] = '\0'; + ub_app(b, " [ "); + ub_app(b, pragma); + ub_app(b, " ]"); + } + ub_app(b, "\n{\n\n"); +} + +static void emit_method(UdlBuf *b, const char *ms, const char *me) { + char mn[MAX_NAME]; + extract_attr(ms, me, "name", mn, sizeof(mn)); + if (!mn[0]) + return; + bool cm = tag_is_one(ms, me, "ClassMethod"); + char formal[1024] = ""; + elem_content(ms, me, "FormalSpec", formal, sizeof(formal)); + char ret[MAX_NAME] = ""; + elem_content(ms, me, "ReturnType", ret, sizeof(ret)); + char desc[4096] = ""; + elem_content(ms, me, "Description", desc, sizeof(desc)); + if (desc[0]) { + ub_app(b, "/// "); + for (char *c = desc; *c; c++) { + if (*c == '\n') + ub_app(b, "\n/// "); + else { + char t[2] = {*c, 0}; + ub_app(b, t); + } + } + ub_app(b, "\n"); + } + ub_app(b, cm ? "ClassMethod " : "Method "); + ub_app(b, mn); + ub_app(b, "("); + ub_app(b, formal); + ub_app(b, ")"); + if (ret[0]) { + ub_app(b, " As "); + ub_app(b, ret); + } + ub_app(b, "\n{\n"); + char impl[1024 * 32] = ""; + elem_content(ms, me, "Implementation", impl, sizeof(impl)); + ub_app(b, impl); + ub_app(b, "}\n\n"); +} + +static void emit_property(UdlBuf *b, const char *ps, const char *pe) { + char pn[MAX_NAME]; + extract_attr(ps, pe, "name", pn, sizeof(pn)); + if (!pn[0]) + return; + char pt[MAX_NAME] = ""; + elem_content(ps, pe, "Type", pt, sizeof(pt)); + PropParam params[MAX_PARAMS]; + int np = 0; + const char *pp = ps; + while (pp < pe && np < MAX_PARAMS) { + const char *po = find_s(pp, pe, ""); + if (!pg) + break; + extract_attr(po, pg, "name", params[np].param_name, MAX_NAME); + extract_attr(po, pg, "value", params[np].param_value, MAX_NAME); + if (!params[np].param_value[0]) { + char db[MAX_NAME]; + const char *a = elem_content(po, pe, "Parameter", db, MAX_NAME); + if (a && db[0]) + strncpy(params[np].param_value, db, MAX_NAME - 1); + } + if (params[np].param_name[0]) + np++; + pp = pg + 1; + } + ub_app(b, "Property "); + ub_app(b, pn); + if (pt[0]) { + ub_app(b, " As "); + ub_app(b, pt); + } + if (np > 0) { + ub_app(b, "("); + for (int i = 0; i < np; i++) { + if (i > 0) + ub_app(b, ", "); + ub_app(b, params[i].param_name); + if (params[i].param_value[0]) { + ub_app(b, " = "); + ub_app(b, params[i].param_value); + } + } + ub_app(b, ")"); + } + ub_app(b, ";\n\n"); +} + +static void emit_parameter(UdlBuf *b, const char *ps, const char *pe) { + char pn[MAX_NAME]; + extract_attr(ps, pe, "name", pn, sizeof(pn)); + if (!pn[0]) + return; + char dv[MAX_NAME] = ""; + elem_content(ps, pe, "Default", dv, sizeof(dv)); + ub_app(b, "Parameter "); + ub_app(b, pn); + if (dv[0]) { + ub_app(b, " = \""); + ub_app(b, dv); + ub_app(b, "\""); + } + ub_app(b, ";\n\n"); +} + +static void emit_index(UdlBuf *b, const char *is_, const char *ie) { + char in_[MAX_NAME]; + extract_attr(is_, ie, "name", in_, sizeof(in_)); + if (!in_[0]) + return; + char props[MAX_NAME * 4] = ""; + elem_content(is_, ie, "Properties", props, sizeof(props)); + bool uniq = tag_is_one(is_, ie, "Unique"); + bool pkey = tag_is_one(is_, ie, "PrimaryKey"); + ub_app(b, "Index "); + ub_app(b, in_); + if (props[0]) { + ub_app(b, " On "); + ub_app(b, props); + } + if (uniq || pkey) { + ub_app(b, " [ "); + if (pkey) + ub_app(b, "PrimaryKey, "); + if (uniq) + ub_app(b, "Unique"); + ub_app(b, " ]"); + } + ub_app(b, ";\n\n"); +} + +static void emit_xdata(UdlBuf *b, const char *xs, const char *xe) { + char xn[MAX_NAME]; + extract_attr(xs, xe, "name", xn, sizeof(xn)); + if (!xn[0]) + return; + char data[1024 * 32] = ""; + elem_content(xs, xe, "Data", data, sizeof(data)); + ub_app(b, "XData "); + ub_app(b, xn); + ub_app(b, "\n{\n"); + ub_app(b, data); + ub_app(b, "\n}\n\n"); +} + +static char *transcode_class(CBMArena *arena, const char *cs, const char *ce) { + UdlBuf b; + ub_init(&b, arena); + if (!b.buf) + return NULL; + emit_header(&b, cs, ce); + const char *p = cs; + while (p < ce) { + p = skip_ws(p, ce); + if (p >= ce || *p != '<') { + if (p < ce) + p++; + continue; + } + if (sw(p, ce, "")) { + const char *gt = find_s(p, ce, ">"); + if (!gt) + break; + const char *me = find_s(gt + 1, ce, ""); + if (!me) { + p = gt + 1; + continue; + } + emit_method(&b, p, me + strlen("")); + p = me + strlen(""); + continue; + } + if (sw(p, ce, ""); + if (!gt) + break; + const char *pe = find_s(gt + 1, ce, ""); + if (!pe) { + p = gt + 1; + continue; + } + emit_property(&b, p, pe + strlen("")); + p = pe + strlen(""); + continue; + } + if (sw(p, ce, ""); + if (!gt) + break; + if (is_self_closing(p, gt)) { + p = gt + 1; + continue; + } + const char *pe = find_s(gt + 1, ce, ""); + if (!pe) { + p = gt + 1; + continue; + } + emit_parameter(&b, p, pe + strlen("")); + p = pe + strlen(""); + continue; + } + if (sw(p, ce, "")) { + const char *gt = find_s(p, ce, ">"); + if (!gt) + break; + const char *ie = find_s(gt + 1, ce, ""); + if (!ie) { + p = gt + 1; + continue; + } + emit_index(&b, p, ie + strlen("")); + p = ie + strlen(""); + continue; + } + if (sw(p, ce, "")) { + const char *gt = find_s(p, ce, ">"); + if (!gt) + break; + const char *xe = find_s(gt + 1, ce, ""); + if (!xe) { + p = gt + 1; + continue; + } + emit_xdata(&b, p, xe + strlen("")); + p = xe + strlen(""); + continue; + } + p = skip_tag(p, ce); + } + ub_app(&b, "}\n"); + return b.buf; +} + +char **cbm_iris_export_to_udl(CBMArena *arena, const char *xml, int xml_len, int *class_count) { + if (class_count) + *class_count = 0; + if (!arena || !xml || xml_len <= 0) + return NULL; + const char *end = xml + xml_len; + if (!find_s(xml, end, EXPORT_MARKER)) + return NULL; + char *results[MAX_CLASSES]; + int count = 0; + const char *p = xml; + while (p < end && count < MAX_CLASSES) { + const char *co = find_s(p, end, ""); + if (!gt) + break; + const char *cc = find_s(gt + 1, end, ""); + if (!cc) + break; + char *udl = transcode_class(arena, co, cc); + if (udl && udl[0]) + results[count++] = udl; + p = cc + strlen(""); + } + if (!count) + return NULL; + char **arr = (char **)cbm_arena_alloc(arena, (size_t)(count + 1) * sizeof(char *)); + if (!arr) + return NULL; + for (int i = 0; i < count; i++) + arr[i] = results[i]; + arr[count] = NULL; + if (class_count) + *class_count = count; + return arr; +} diff --git a/internal/cbm/iris_export_xml.h b/internal/cbm/iris_export_xml.h new file mode 100644 index 00000000..30c289ff --- /dev/null +++ b/internal/cbm/iris_export_xml.h @@ -0,0 +1,19 @@ +#pragma once +#include "arena.h" + +/* + * IRIS Studio Export XML transcoder. + * + * Converts XML files to equivalent UDL text so + * they can be fed to the existing ObjectScript UDL extraction pipeline. + * The XML-to-UDL mapping is 1:1; no new extraction logic is needed. + * + * One Export file may contain multiple blocks. Each produces a + * separate UDL string. The caller iterates the returned array and calls + * cbm_extract_file(..., CBM_LANG_OBJECTSCRIPT_UDL, ...) for each entry. + * + * Returns arena-allocated array of NUL-terminated UDL strings, or NULL + * if the file is not an Export file or parsing fails gracefully. + * *class_count is set to the number of classes found (0 on failure). + */ +char **cbm_iris_export_to_udl(CBMArena *arena, const char *xml, int xml_len, int *class_count); diff --git a/internal/cbm/lang_specs.c b/internal/cbm/lang_specs.c index 68d2afad..0af2b3e9 100644 --- a/internal/cbm/lang_specs.c +++ b/internal/cbm/lang_specs.c @@ -164,6 +164,8 @@ extern const TSLanguage *tree_sitter_apex(void); extern const TSLanguage *tree_sitter_soql(void); extern const TSLanguage *tree_sitter_sosl(void); extern const TSLanguage *tree_sitter_pine(void); +extern const TSLanguage *tree_sitter_objectscript_udl(void); +extern const TSLanguage *tree_sitter_objectscript_routine(void); // -- Empty sentinel -- static const char *empty_types[] = {NULL}; @@ -1562,6 +1564,25 @@ static const char *pine_var_types[] = {"variable_definition_statement", static const char *pine_branch_types[] = {"if_statement", "switch_statement", "for_statement", "for_in_statement", "while_statement", NULL}; static const char *pine_assign_types[] = {"reassignment_statement", NULL}; + +// InterSystems ObjectScript. Node names verified against +// intersystems/tree-sitter-objectscript grammar. +static const char *objectscript_udl_func_types[] = {"method", "classmethod", "query", NULL}; +static const char *objectscript_udl_class_types[] = {"class_definition", NULL}; +static const char *objectscript_udl_field_types[] = { + "property", "parameter", "index", "trigger", "xdata", "storage", "foreignkey", NULL}; +static const char *objectscript_udl_call_types[] = {"class_method_call", "instance_method_call", + "relative_dot_method", "macro", NULL}; +static const char *objectscript_udl_module_types[] = {"source_file", NULL}; +/* Branching nodes for cyclomatic complexity (verified against grammar node-types) */ +static const char *objectscript_udl_branch_types[] = { + "command_if", "command_for", "command_while", "elseif_block", "catch_block", NULL}; + +static const char *objectscript_routine_func_types[] = {"tag", NULL}; +static const char *objectscript_routine_call_types[] = {"extrinsic_function", "routine_tag_call", + NULL}; +static const char *objectscript_routine_module_types[] = {"source_file", NULL}; + // ==================== SPEC TABLE ==================== static const CBMLangSpec lang_specs[CBM_LANG_COUNT] = { @@ -2537,6 +2558,29 @@ static const CBMLangSpec lang_specs[CBM_LANG_COUNT] = { pine_branch_types, pine_var_types, pine_assign_types, empty_types, NULL, empty_types, NULL, NULL, tree_sitter_pine, NULL}, + // CBM_LANG_OBJECTSCRIPT_UDL — InterSystems ObjectScript class (.cls) UDL. + // intersystems/tree-sitter-objectscript. + [CBM_LANG_OBJECTSCRIPT_UDL] = {CBM_LANG_OBJECTSCRIPT_UDL, objectscript_udl_func_types, + objectscript_udl_class_types, objectscript_udl_field_types, + objectscript_udl_module_types, objectscript_udl_call_types, + empty_types, empty_types, objectscript_udl_branch_types, + empty_types, empty_types, empty_types, NULL, empty_types, NULL, + NULL, tree_sitter_objectscript_udl, NULL}, + + // CBM_LANG_OBJECTSCRIPT_ROUTINE — InterSystems ObjectScript routine (.mac/.int/.rtn/.inc). + [CBM_LANG_OBJECTSCRIPT_ROUTINE] = {CBM_LANG_OBJECTSCRIPT_ROUTINE, + objectscript_routine_func_types, empty_types, empty_types, + objectscript_routine_module_types, + objectscript_routine_call_types, empty_types, empty_types, + empty_types, empty_types, empty_types, empty_types, NULL, + empty_types, NULL, NULL, tree_sitter_objectscript_routine, + NULL}, + + // CBM_LANG_OBJECTSCRIPT_EXPORT — Studio Export XML. No grammar row: the + // pipeline transcodes Export XML to UDL (iris_export_xml.c) and re-extracts + // each class as CBM_LANG_OBJECTSCRIPT_UDL, so this language never reaches + // cbm_lang_spec()/cbm_ts_language() directly. Left as a zero spec. + }; _Static_assert(sizeof(lang_specs) / sizeof(lang_specs[0]) == CBM_LANG_COUNT, diff --git a/internal/cbm/macro_table.c b/internal/cbm/macro_table.c new file mode 100644 index 00000000..cca63679 --- /dev/null +++ b/internal/cbm/macro_table.c @@ -0,0 +1,243 @@ +#include "macro_table.h" +#include "arena.h" +#include +#include +#include + +static const struct { + const char *name; + int param_count; + const char *callee; +} SYSTEM_MACROS[] = {{"OK", 0, NULL}, + {"ISERR", 1, "%SYSTEM.Status.IsError"}, + {"ISOK", 1, "%SYSTEM.Status.IsOK"}, + {"GETERRORTEXT", 1, "%SYSTEM.Status.GetErrorText"}, + {"ADDSC", 2, "%SYSTEM.Status.AppendStatus"}, + {"ThrowStatus", 1, "%SYSTEM.Status.ThrowStatus"}, + {"ThrowOnError", 1, "%SYSTEM.Status.ThrowStatus"}, + {"ERROR", 2, "%SYSTEM.Status.Error"}, + {"NULLOREF", 0, NULL}, + {"LISTBUILD", -1, NULL}, + {"LISTGET", 2, NULL}, + {"LISTNEXT", 3, NULL}, + {"LISTLENGTH", 1, NULL}, + {"SORTBEGIN", 1, NULL}, + {"SORTEND", 0, NULL}, + {"AUDITSTART", 3, "%SYSTEM.Audit.Event"}, + {"logoutput", 1, NULL}, + {"objExists", 1, NULL}, + {"traceStatus", 1, NULL}, + {NULL, 0, NULL}}; + +void cbm_macro_table_init_system(CBMMacroTable *t) { + t->count = 0; + for (int i = 0; SYSTEM_MACROS[i].name; i++) { + if (t->count >= CBM_MACRO_TABLE_CAP) + break; + CBMMacroEntry *e = &t->entries[t->count++]; + e->name = SYSTEM_MACROS[i].name; + e->param_count = SYSTEM_MACROS[i].param_count; + e->expansion = NULL; + e->resolved_callee = SYSTEM_MACROS[i].callee; + for (int p = 0; p < CBM_MACRO_MAX_PARAMS; p++) + e->param_names[p] = NULL; + } +} + +void cbm_macro_table_add(CBMMacroTable *t, CBMArena *arena, const char *name, int param_count, + const char **param_names, const char *expansion, + const char *resolved_callee) { + if (t->count >= CBM_MACRO_TABLE_CAP || !name) + return; + for (int i = 0; i < t->count; i++) { + if (strcasecmp(t->entries[i].name, name) == 0) + return; + } + CBMMacroEntry *e = &t->entries[t->count++]; + e->name = cbm_arena_strdup(arena, name); + e->param_count = param_count; + e->expansion = expansion ? cbm_arena_strdup(arena, expansion) : NULL; + e->resolved_callee = resolved_callee ? cbm_arena_strdup(arena, resolved_callee) : NULL; + for (int p = 0; p < CBM_MACRO_MAX_PARAMS; p++) { + e->param_names[p] = + (param_names && p < param_count) ? cbm_arena_strdup(arena, param_names[p]) : NULL; + } +} + +const CBMMacroEntry *cbm_macro_table_find(const CBMMacroTable *t, const char *name) { + if (!t || !name) + return NULL; + for (int i = 0; i < t->count; i++) { + if (strcasecmp(t->entries[i].name, name) == 0) + return &t->entries[i]; + } + return NULL; +} + +void cbm_parse_inc_file(CBMMacroTable *t, CBMArena *arena, const char *content) { + if (!content) + return; + const char *line = content; + while (*line) { + const char *end = strchr(line, '\n'); + if (!end) + end = line + strlen(line); + + const char *p = line; + while (*p == ' ' || *p == '\t') + p++; + + if (strncmp(p, "#define", 7) == 0 && (p[7] == ' ' || p[7] == '\t')) { + p += 8; + while (*p == ' ' || *p == '\t') + p++; + + const char *name_start = p; + while (*p && *p != '(' && *p != ' ' && *p != '\t' && p < end) + p++; + if (p == name_start) + goto next_line; + + char name[256]; + int nlen = (int)(p - name_start); + if (nlen >= (int)sizeof(name)) + goto next_line; + memcpy(name, name_start, nlen); + name[nlen] = '\0'; + + int param_count = -1; + char param_names_buf[CBM_MACRO_MAX_PARAMS][64]; + const char *param_name_ptrs[CBM_MACRO_MAX_PARAMS] = {NULL}; + + if (*p == '(') { + param_count = 0; + p++; + while (*p && *p != ')' && p < end) { + while (*p == ' ' || *p == '\t') + p++; + if (*p == ')') + break; + const char *pn_start = p; + while (*p && *p != ',' && *p != ')' && p < end) + p++; + int plen = (int)(p - pn_start); + while (plen > 0 && (pn_start[plen - 1] == ' ' || pn_start[plen - 1] == '\t')) + plen--; + if (plen > 0 && param_count < CBM_MACRO_MAX_PARAMS) { + memcpy(param_names_buf[param_count], pn_start, plen < 63 ? plen : 63); + param_names_buf[param_count][plen < 63 ? plen : 63] = '\0'; + param_name_ptrs[param_count] = param_names_buf[param_count]; + param_count++; + } + if (*p == ',') + p++; + } + if (*p == ')') + p++; + } + + while (*p == ' ' || *p == '\t') + p++; + int explen = (int)(end - p); + while (explen > 0 && + (p[explen - 1] == '\r' || p[explen - 1] == ' ' || p[explen - 1] == '\t')) + explen--; + char *expansion = NULL; + if (explen > 0) { + expansion = cbm_arena_strndup(arena, p, explen); + } + + cbm_macro_table_add(t, arena, name, param_count, + param_count > 0 ? param_name_ptrs : NULL, expansion, NULL); + } + + next_line: + if (!*end) + break; + line = end + 1; + } +} + +char *cbm_macro_expand(CBMArena *arena, const CBMMacroEntry *entry, const char **args, + int arg_count) { + if (!entry || !entry->expansion) + return NULL; + const char *tmpl = entry->expansion; + char buf[1024]; + int out = 0; + const char *p = tmpl; + while (*p && out < (int)sizeof(buf) - 1) { + if (*p == '%') { + bool matched = false; + for (int i = 0; i < entry->param_count && i < CBM_MACRO_MAX_PARAMS; i++) { + if (!entry->param_names[i]) + continue; + int pnlen = (int)strlen(entry->param_names[i]); + if (strncasecmp(p, entry->param_names[i], pnlen) == 0) { + const char *arg = (args && i < arg_count) ? args[i] : ""; + int alen = (int)strlen(arg); + if (out + alen < (int)sizeof(buf) - 1) { + memcpy(buf + out, arg, alen); + out += alen; + } + p += pnlen; + matched = true; + break; + } + } + if (!matched) + buf[out++] = *p++; + } else { + buf[out++] = *p++; + } + } + buf[out] = '\0'; + return cbm_arena_strdup(arena, buf); +} + +char *cbm_macro_extract_callee(CBMArena *arena, const char *expansion) { + if (!expansion) + return NULL; + + const char *p = strstr(expansion, "##class("); + if (p) { + p += 8; + const char *cls_end = strchr(p, ')'); + if (!cls_end) + return NULL; + int clen = (int)(cls_end - p); + const char *dot = cls_end + 1; + if (*dot != '.') + return NULL; + dot++; + const char *method_start = dot; + const char *method_end = method_start; + while (*method_end && *method_end != '(' && *method_end != ' ') + method_end++; + int mlen = (int)(method_end - method_start); + if (clen <= 0 || mlen <= 0) + return NULL; + return cbm_arena_sprintf(arena, "%.*s.%.*s", clen, p, mlen, method_start); + } + + p = strstr(expansion, "$$"); + if (p && p[2] != '$') { + p += 2; + const char *tag_end = p; + while (*tag_end && *tag_end != '^' && *tag_end != '(' && *tag_end != ' ') + tag_end++; + if (*tag_end == '^') { + const char *rtn = tag_end + 1; + const char *rtn_end = rtn; + while (*rtn_end && *rtn_end != '(' && *rtn_end != ' ') + rtn_end++; + int tlen = (int)(tag_end - p); + int rlen = (int)(rtn_end - rtn); + if (tlen > 0 && rlen > 0) { + return cbm_arena_sprintf(arena, "%.*s^%.*s", tlen, p, rlen, rtn); + } + } + } + + return NULL; +} diff --git a/internal/cbm/macro_table.h b/internal/cbm/macro_table.h new file mode 100644 index 00000000..661475cb --- /dev/null +++ b/internal/cbm/macro_table.h @@ -0,0 +1,43 @@ +#pragma once +#include +#include "arena.h" + +#define CBM_MACRO_MAX_PARAMS 4 +#define CBM_MACRO_TABLE_CAP 4096 + +typedef struct { + const char *name; + int param_count; + const char *param_names[CBM_MACRO_MAX_PARAMS]; + const char *expansion; + const char *resolved_callee; +} CBMMacroEntry; + +typedef struct CBMMacroTable { + CBMMacroEntry entries[CBM_MACRO_TABLE_CAP]; + int count; +} CBMMacroTable; + +// Add an entry. Silently drops on overflow. +void cbm_macro_table_add(CBMMacroTable *t, CBMArena *arena, const char *name, int param_count, + const char **param_names, const char *expansion, + const char *resolved_callee); + +// Look up by name. Returns NULL if not found. +const CBMMacroEntry *cbm_macro_table_find(const CBMMacroTable *t, const char *name); + +// Parse a single .inc file content into the table (arena-allocated strings). +void cbm_parse_inc_file(CBMMacroTable *t, CBMArena *arena, const char *content); + +// Expand a macro call: substitute args into expansion text. +// Returns arena-allocated expanded text, or NULL if no expansion. +char *cbm_macro_expand(CBMArena *arena, const CBMMacroEntry *entry, const char **args, + int arg_count); + +// Extract a callee name from expanded text (looks for ##class(X).Method or $$Label^Routine). +// Returns arena-allocated "X.Method" or "Label^Routine", or NULL. +char *cbm_macro_extract_callee(CBMArena *arena, const char *expansion); + +// Allocate and populate a new table with the hardcoded system macros. +// Caller owns the table (stack or heap). +void cbm_macro_table_init_system(CBMMacroTable *t); diff --git a/src/discover/discover.c b/src/discover/discover.c index 314c00c5..a6a8bc49 100644 --- a/src/discover/discover.c +++ b/src/discover/discover.c @@ -337,6 +337,28 @@ static CBMLanguage detect_file_language(const char *entry_name, const char *abs_ if (dot && strcmp(dot, ".m") == 0) { lang = cbm_disambiguate_m(abs_path); } + /* Special: .cls is shared by ObjectScript UDL and Apex */ + if (dot && strcmp(dot, ".cls") == 0) { + lang = cbm_disambiguate_cls(abs_path); + } + /* Special: .inc is shared by BitBake and ObjectScript include files */ + if (dot && strcmp(dot, ".inc") == 0) { + lang = cbm_disambiguate_inc(abs_path); + } + /* Special: ObjectScript Studio Export XML () is + * detected by content; otherwise .xml stays XML. */ + if (lang == CBM_LANG_XML) { + FILE *xf = fopen(abs_path, "r"); + if (xf) { + char xbuf[CBM_SZ_256]; + size_t xn = fread(xbuf, SKIP_ONE, sizeof(xbuf) - SKIP_ONE, xf); + (void)fclose(xf); + xbuf[xn] = '\0'; + if (strstr(xbuf, "", + * otherwise CBM_LANG_APEX. On read failure, defaults to CBM_LANG_APEX. */ +CBMLanguage cbm_disambiguate_cls(const char *path); + +/* Disambiguate .inc files by reading first 4KB of content. + * Returns CBM_LANG_OBJECTSCRIPT_ROUTINE if it looks like an ObjectScript + * include (a "ROUTINE " header), otherwise CBM_LANG_BITBAKE. + * On read failure, defaults to CBM_LANG_BITBAKE. */ +CBMLanguage cbm_disambiguate_inc(const char *path); + /* ── Gitignore pattern matching ──────────────────────────────────── */ typedef struct cbm_gitignore cbm_gitignore_t; diff --git a/src/discover/language.c b/src/discover/language.c index a0254306..b4b80d1e 100644 --- a/src/discover/language.c +++ b/src/discover/language.c @@ -264,9 +264,17 @@ static const ext_entry_t EXT_TABLE[] = { {"WORKSPACE", CBM_LANG_STARLARK}, {"WORKSPACE.bazel", CBM_LANG_STARLARK}, - /* BitBake include fragments — `require/include foo.inc` target files. */ + /* BitBake include fragments — `require/include foo.inc` target files. + * NOTE: .inc is also used by ObjectScript include (macro) files; the + * ambiguity is resolved by content in cbm_disambiguate_inc(). */ {".inc", CBM_LANG_BITBAKE}, + /* InterSystems ObjectScript routines (.mac/.int/.rtn unambiguous; .cls is + * shared with Apex and resolved by content in cbm_disambiguate_cls()). */ + {".mac", CBM_LANG_OBJECTSCRIPT_ROUTINE}, + {".int", CBM_LANG_OBJECTSCRIPT_ROUTINE}, + {".rtn", CBM_LANG_OBJECTSCRIPT_ROUTINE}, + /* Vue */ {".vue", CBM_LANG_VUE}, @@ -834,6 +842,9 @@ static const char *LANG_NAMES[CBM_LANG_COUNT] = { [CBM_LANG_APEX] = "Apex", [CBM_LANG_SOQL] = "SOQL", [CBM_LANG_SOSL] = "SOSL", + [CBM_LANG_OBJECTSCRIPT_UDL] = "ObjectScript UDL", + [CBM_LANG_OBJECTSCRIPT_ROUTINE] = "ObjectScript Routine", + [CBM_LANG_OBJECTSCRIPT_EXPORT] = "ObjectScript Export XML", }; @@ -1025,3 +1036,84 @@ CBMLanguage cbm_disambiguate_m(const char *path) { return CBM_LANG_MATLAB; } + +/* Disambiguate .cls files: shared by InterSystems ObjectScript UDL and + * Salesforce Apex. ObjectScript class files begin with a line of the form + * "Class ...". Defaults to Apex on any doubt. */ +CBMLanguage cbm_disambiguate_cls(const char *path) { + if (!path) { + return CBM_LANG_APEX; + } + + FILE *f = fopen(path, "r"); + if (!f) { + return CBM_LANG_APEX; + } + + char buf[CBM_SZ_4K + SKIP_ONE]; + size_t n = fread(buf, SKIP_ONE, CBM_SZ_4K, f); + buf[n] = '\0'; + (void)fclose(f); + + const char *line = buf; + while (*line) { + if (strncmp(line, "Class ", SLEN("Class ")) == 0 && + isupper((unsigned char)line[SLEN("Class ")])) { + return CBM_LANG_OBJECTSCRIPT_UDL; + } + const char *nl = strchr(line, '\n'); + if (!nl) { + break; + } + line = nl + SKIP_ONE; + } + return CBM_LANG_APEX; +} + +/* Disambiguate .inc files: shared by BitBake include fragments and + * InterSystems ObjectScript include (macro) files. ObjectScript .inc files are + * predominantly macro definitions ("#define NAME ..." / "#def1arg NAME ..."); + * some also carry a "ROUTINE " header. The macro-preprocessor directives + * are the strongest signal because that is the primary content of an .inc file, + * whereas BitBake uses '#' only for "# comment" lines (always '#' + space). + * We therefore match ObjectScript preprocessor directives ('#' immediately + * followed by 'def'/';'), which BitBake never produces. Defaults to BitBake on + * any doubt (preserves existing behaviour). */ +CBMLanguage cbm_disambiguate_inc(const char *path) { + if (!path) { + return CBM_LANG_BITBAKE; + } + + FILE *f = fopen(path, "r"); + if (!f) { + return CBM_LANG_BITBAKE; + } + + char buf[CBM_SZ_4K + SKIP_ONE]; + size_t n = fread(buf, SKIP_ONE, CBM_SZ_4K, f); + buf[n] = '\0'; + (void)fclose(f); + + const char *line = buf; + while (*line) { + /* ObjectScript include header: a line beginning "ROUTINE ". */ + if (strncmp(line, "ROUTINE ", SLEN("ROUTINE ")) == 0 && + isupper((unsigned char)line[SLEN("ROUTINE ")])) { + return CBM_LANG_OBJECTSCRIPT_ROUTINE; + } + /* ObjectScript macro directives — the primary content of .inc files. + * "#define"/"#def1arg" (macro defs) and "#;" (line comment). BitBake's + * only '#' use is "# comment" (hash + space), so these never collide. */ + if (strncmp(line, "#define", SLEN("#define")) == 0 || + strncmp(line, "#def1arg", SLEN("#def1arg")) == 0 || + strncmp(line, "#;", SLEN("#;")) == 0) { + return CBM_LANG_OBJECTSCRIPT_ROUTINE; + } + const char *nl = strchr(line, '\n'); + if (!nl) { + break; + } + line = nl + SKIP_ONE; + } + return CBM_LANG_BITBAKE; +} diff --git a/src/pipeline/pass_calls.c b/src/pipeline/pass_calls.c index 15d691d3..ad9b7601 100644 --- a/src/pipeline/pass_calls.c +++ b/src/pipeline/pass_calls.c @@ -375,6 +375,85 @@ static int resolve_single_call(cbm_pipeline_ctx_t *ctx, CBMCall *call, return SKIP_ONE; } +/* ObjectScript: build a method-QN -> return-type table from the Method nodes + * already in the graph buffer (definitions pass ran first). Scalar return types + * (%String, %Integer, ...) are skipped since they cannot host method dispatch. + * Returns NULL when no usable entries exist. Caller owns the heap table. */ +static CBMReturnTypeTable *build_return_type_table(const cbm_gbuf_t *gbuf) { + if (!gbuf) { + return NULL; + } + const cbm_gbuf_node_t **method_nodes = NULL; + int method_count = 0; + if (cbm_gbuf_find_by_label(gbuf, "Method", &method_nodes, &method_count) != 0 || + method_count <= 0 || !method_nodes) { + return NULL; + } + + CBMReturnTypeTable *rtt = (CBMReturnTypeTable *)calloc(1, sizeof(CBMReturnTypeTable)); + if (!rtt) { + free((void *)method_nodes); + return NULL; + } + + static const char *scalar_types[] = {"%String", "%Integer", "%Float", "%Boolean", + "%Status", "%Numeric", "%Date", "%Time", + "%TimeStamp", "%Binary", NULL}; + + for (int i = 0; i < method_count && rtt->count < CBM_RETURN_TYPE_TABLE_CAP; i++) { + const cbm_gbuf_node_t *n = method_nodes[i]; + if (!n->qualified_name || !n->properties_json) { + continue; + } + + const char *p = strstr(n->properties_json, "\"return_type\":"); + if (!p) { + continue; + } + p += 14; /* strlen("\"return_type\":") */ + while (*p == ' ') { + p++; + } + if (*p != '"') { + continue; + } + p++; + const char *end = strchr(p, '"'); + if (!end) { + continue; + } + int rtlen = (int)(end - p); + if (rtlen <= 0 || rtlen > 255) { + continue; + } + + char rt_buf[256]; + memcpy(rt_buf, p, (size_t)rtlen); + rt_buf[rtlen] = '\0'; + + bool is_scalar = false; + for (int si = 0; scalar_types[si]; si++) { + if (strcmp(rt_buf, scalar_types[si]) == 0) { + is_scalar = true; + break; + } + } + if (is_scalar) { + continue; + } + + rtt->entries[rtt->count].method_qn = n->qualified_name; + rtt->entries[rtt->count].return_type = strdup(rt_buf); + rtt->count++; + } + free((void *)method_nodes); + if (rtt->count == 0) { + free(rtt); + return NULL; + } + return rtt; +} + static CBMFileResult *calls_get_or_extract(cbm_pipeline_ctx_t *ctx, int idx, const cbm_file_info_t *fi, bool *owned) { *owned = false; @@ -386,8 +465,9 @@ static CBMFileResult *calls_get_or_extract(cbm_pipeline_ctx_t *ctx, int idx, if (!src) { return NULL; } - CBMFileResult *r = cbm_extract_file(src, slen, fi->language, ctx->project_name, fi->rel_path, - CBM_EXTRACT_BUDGET, NULL, NULL); + CBMFileResult *r = cbm_extract_file_ex(src, slen, fi->language, ctx->project_name, fi->rel_path, + CBM_EXTRACT_BUDGET, NULL, NULL, ctx->macro_table, + ctx->return_type_table); free(src); if (r) { *owned = true; @@ -398,6 +478,16 @@ static CBMFileResult *calls_get_or_extract(cbm_pipeline_ctx_t *ctx, int idx, int cbm_pipeline_pass_calls(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, int file_count) { cbm_log_info("pass.start", "pass", "calls", "files", itoa_log(file_count)); + /* ObjectScript: build the method-return-type table from the definitions + * already in the graph buffer so `Set x = obj.Method()` can resolve x's + * class for subsequent x.Method() dispatch. NULL if no Method nodes. */ + if (!ctx->return_type_table) { + CBMReturnTypeTable *rtt = build_return_type_table(ctx->gbuf); + if (rtt) { + ctx->return_type_table = rtt; + } + } + int total_calls = 0; int resolved = 0; int unresolved = 0; diff --git a/src/pipeline/pass_definitions.c b/src/pipeline/pass_definitions.c index 676f1b16..2aedbda4 100644 --- a/src/pipeline/pass_definitions.c +++ b/src/pipeline/pass_definitions.c @@ -23,6 +23,8 @@ enum { PD_JSON_FIELD_OVERHEAD = 6 }; #include "foundation/log.h" #include "foundation/compat.h" #include "cbm.h" +#include "arena.h" +#include "iris_export_xml.h" #include "simhash/minhash.h" #include "semantic/ast_profile.h" @@ -489,11 +491,42 @@ int cbm_pipeline_pass_definitions(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t continue; } + /* ObjectScript Studio Export XML: transcode each to UDL and + * extract it as CBM_LANG_OBJECTSCRIPT_UDL. The XML→UDL mapping is 1:1, + * so the same UDL extractor handles the result. These files are not + * cached (their defs/edges are emitted directly here). */ + if (lang == CBM_LANG_OBJECTSCRIPT_EXPORT) { + CBMArena export_arena; + cbm_arena_init(&export_arena); + int class_count = 0; + char **udl_strings = + cbm_iris_export_to_udl(&export_arena, source, source_len, &class_count); + free(source); + for (int ci = 0; ci < class_count; ci++) { + CBMFileResult *xr = cbm_extract_file_ex( + udl_strings[ci], (int)strlen(udl_strings[ci]), CBM_LANG_OBJECTSCRIPT_UDL, + ctx->project_name, rel, CBM_EXTRACT_BUDGET, NULL, NULL, ctx->macro_table, NULL); + if (!xr) { + continue; + } + for (int d = 0; d < xr->defs.count; d++) { + process_def(ctx, &xr->defs.items[d], rel); + total_defs++; + } + total_calls += xr->calls.count; + total_imports += create_import_edges_for_file(ctx, xr, rel, NULL); + create_channel_edges_for_file(ctx, xr, rel); + create_env_configures_for_file(ctx, xr, rel); + cbm_free_result(xr); + } + cbm_arena_destroy(&export_arena); + continue; + } + /* Extract */ - CBMFileResult *result = - cbm_extract_file(source, source_len, lang, ctx->project_name, rel, CBM_EXTRACT_BUDGET, - NULL, NULL /* no extra defines or include paths */ - ); + CBMFileResult *result = cbm_extract_file_ex( + source, source_len, lang, ctx->project_name, rel, CBM_EXTRACT_BUDGET, NULL, + NULL /* no extra defines or include paths */, ctx->macro_table, NULL); free(source); if (!result) { diff --git a/src/pipeline/pass_ensemble_routing.c b/src/pipeline/pass_ensemble_routing.c new file mode 100644 index 00000000..a3ded5a5 --- /dev/null +++ b/src/pipeline/pass_ensemble_routing.c @@ -0,0 +1,645 @@ +#include "pipeline/pass_ensemble_routing.h" +#include "pipeline/pipeline_internal.h" +#include "graph_buffer/graph_buffer.h" +#include "foundation/log.h" +#include "foundation/compat.h" +#include "foundation/compat_fs.h" +#include "foundation/constants.h" +#include "foundation/str_util.h" + +#include +#include +#include +#include +#include + +#define CONF_LITERAL 0.95 +#define CONF_PROP 0.85 + +#define MAX_ITEMS 256 +#define MAX_SETTINGS 8 + +static const char *TOPOLOGY_SETTINGS[] = {"TargetConfigName", "PatientHost", "ConformanceOperation", + NULL}; + +static const char *ENTRY_POINTS[] = {"OnProcessInput", "OnMessage", "OnRequest", "OnTask", NULL}; + +typedef struct { + char setting_name[CBM_SZ_256]; + char value[CBM_SZ_256]; +} ens_setting_t; + +typedef struct { + char item_name[CBM_SZ_256]; + char class_name[CBM_SZ_256]; + bool enabled; + ens_setting_t settings[MAX_SETTINGS]; + int n_settings; +} ens_item_t; + +typedef struct { + char production_class[CBM_SZ_256]; + char file_path[CBM_SZ_512]; + ens_item_t items[MAX_ITEMS]; + int n_items; +} ens_prod_def_t; + +static void extract_xml_attr(const char *xml, int offset, const char *attr, char *out, int outsz) { + char needle[CBM_SZ_64]; + snprintf(needle, sizeof(needle), "%s=\"", attr); + const char *p = strstr(xml + offset, needle); + out[0] = '\0'; + if (!p) + return; + p += strlen(needle); + const char *e = strchr(p, '"'); + if (!e) + return; + int len = (int)(e - p); + if (len >= outsz) + len = outsz - 1; + memcpy(out, p, (size_t)len); + out[len] = '\0'; +} + +static bool is_topology_setting(const char *name) { + for (int i = 0; TOPOLOGY_SETTINGS[i]; i++) + if (strcmp(name, TOPOLOGY_SETTINGS[i]) == 0) + return true; + return false; +} + +/* True if `qn` equals `seg` or ends with "." — a segment-anchored match. + * Avoids the false positives of a bare strstr (e.g. "MyService" matching + * "NotMyService", or "A.B.Meth" matching the unrelated "X.B.Meth"). */ +static bool qn_ends_with_segment(const char *qn, const char *seg) { + if (!qn || !seg) + return false; + size_t lq = strlen(qn), ls = strlen(seg); + if (ls == 0 || ls > lq) + return false; + if (strcmp(qn + (lq - ls), seg) != 0) + return false; + return lq == ls || qn[lq - ls - 1] == '.'; +} + +static ens_prod_def_t *parse_production_xml(const char *xml, const char *class_qn, + const char *file_path) { + ens_prod_def_t *def = calloc(1, sizeof(ens_prod_def_t)); + if (!def) + return NULL; + snprintf(def->production_class, CBM_SZ_256, "%s", class_qn); + snprintf(def->file_path, sizeof(def->file_path), "%s", file_path ? file_path : ""); + + const char *p = xml; + while (*p && def->n_items < MAX_ITEMS) { + const char *item_start = strstr(p, "items[def->n_items]; + memset(item, 0, sizeof(*item)); + item->enabled = true; + + int off = (int)(item_start - xml); + extract_xml_attr(xml, off, "Name", item->item_name, CBM_SZ_256); + extract_xml_attr(xml, off, "ClassName", item->class_name, CBM_SZ_256); + char en[16]; + extract_xml_attr(xml, off, "Enabled", en, sizeof(en)); + if (en[0] && strcasecmp(en, "false") == 0) + item->enabled = false; + + if (!item->item_name[0] || !item->class_name[0]) { + p = item_start + 6; + continue; + } + + const char *item_end = strstr(item_start, ""); + if (!item_end) + item_end = item_start + strlen(item_start); + + const char *sp = item_start; + while (sp < item_end && item->n_settings < MAX_SETTINGS) { + const char *set = strstr(sp, "= item_end) + break; + int soff = (int)(set - xml); + char tgt[64], sname[CBM_SZ_256]; + extract_xml_attr(xml, soff, "Target", tgt, sizeof(tgt)); + extract_xml_attr(xml, soff, "Name", sname, CBM_SZ_256); + if (strcmp(tgt, "Host") == 0 && is_topology_setting(sname)) { + const char *vs = strchr(set + 9, '>'); + if (vs) { + vs++; + const char *ve = strstr(vs, ""); + if (ve && ve < item_end) { + int vlen = (int)(ve - vs); + if (vlen > 0 && vlen < CBM_SZ_256) { + ens_setting_t *s = &item->settings[item->n_settings++]; + snprintf(s->setting_name, CBM_SZ_256, "%s", sname); + memcpy(s->value, vs, (size_t)vlen); + s->value[vlen] = '\0'; + } + } + } + } + sp = set + 9; + } + def->n_items++; + p = item_end + 7; + } + return def; +} + +static char *read_file(const char *full_path) { + FILE *f = fopen(full_path, "rb"); + if (!f) + return NULL; + fseek(f, 0, SEEK_END); + long sz = ftell(f); + fseek(f, 0, SEEK_SET); + if (sz <= 0 || sz > 8 * 1024 * 1024) { + fclose(f); + return NULL; + } + char *buf = malloc((size_t)sz + 1); + if (!buf) { + fclose(f); + return NULL; + } + fread(buf, 1, (size_t)sz, f); + buf[sz] = '\0'; + fclose(f); + return buf; +} + +static const char *jstr(const char *json, const char *key, char *buf, int sz) { + if (!json || !key) + return NULL; + char needle[CBM_SZ_64]; + snprintf(needle, sizeof(needle), "\"%s\":\"", key); + const char *s = strstr(json, needle); + if (!s) + return NULL; + s += strlen(needle); + const char *e = strchr(s, '"'); + if (!e) + return NULL; + int len = (int)(e - s); + if (len >= sz) + len = sz - 1; + memcpy(buf, s, (size_t)len); + buf[len] = '\0'; + return buf; +} + +static const ens_item_t *find_item(const ens_prod_def_t *def, const char *name) { + for (int i = 0; i < def->n_items; i++) + if (strcmp(def->items[i].item_name, name) == 0) + return &def->items[i]; + return NULL; +} + +static int64_t find_entry_point(cbm_pipeline_ctx_t *ctx, const char *class_name) { + for (int ei = 0; ENTRY_POINTS[ei]; ei++) { + char suffix[CBM_SZ_512]; + snprintf(suffix, sizeof(suffix), "%s.%s", class_name, ENTRY_POINTS[ei]); + + const cbm_gbuf_node_t **nodes = NULL; + int count = 0; + cbm_gbuf_find_by_name(ctx->gbuf, ENTRY_POINTS[ei], (const cbm_gbuf_node_t ***)&nodes, + &count); + for (int ni = 0; ni < count; ni++) { + if (nodes[ni]->qualified_name && + qn_ends_with_segment(nodes[ni]->qualified_name, suffix)) + return nodes[ni]->id; + } + } + return 0; +} + +static void emit_route(cbm_pipeline_ctx_t *ctx, int64_t src_id, const ens_item_t *item, + const char *via, double confidence, const char *production_class) { + int64_t tgt_id = find_entry_point(ctx, item->class_name); + if (!tgt_id) { + char cls_qn[CBM_SZ_512]; + snprintf(cls_qn, sizeof(cls_qn), "%s.%s", production_class, item->item_name); + const cbm_gbuf_node_t *cls = cbm_gbuf_find_by_qn(ctx->gbuf, cls_qn); + if (!cls) + return; + tgt_id = cls->id; + confidence -= 0.10; + } + char conf_str[32]; + snprintf(conf_str, sizeof(conf_str), "%.2f", confidence); + char props[CBM_SZ_512]; + snprintf(props, sizeof(props), + "{\"via\":\"%s\",\"production\":\"%s\",\"item_name\":\"%s\"," + "\"confidence\":%s,\"enabled\":%s}", + via, production_class, item->item_name, conf_str, item->enabled ? "true" : "false"); + cbm_gbuf_insert_edge(ctx->gbuf, src_id, tgt_id, "ROUTES_TO", props); +} + +/* Scan a .cls source file for SendRequestSync call targets and + * InitialExpression values for a given method/property name. */ +static void scan_source_for_send_targets(const char *source, const char *method_name, + char *literal_out, int lit_sz, char *prop_name_out, + int prop_sz) { + literal_out[0] = '\0'; + prop_name_out[0] = '\0'; + if (!source || !method_name) + return; + + const char *p = source; + while ((p = strstr(p, "SendRequestSync")) != NULL) { + p += 15; + while (*p == ' ' || *p == '\t') + p++; + if (*p != '(') + continue; + p++; + while (*p == ' ' || *p == '\t') + p++; + + if (*p == '"') { + const char *ns = p + 1, *ne = strchr(ns, '"'); + if (ne) { + int len = (int)(ne - ns); + if (len > 0 && len < lit_sz) { + memcpy(literal_out, ns, (size_t)len); + literal_out[len] = '\0'; + return; + } + } + } else if (p[0] == '.' && p[1] == '.') { + const char *ps = p + 2; + int plen = 0; + while (ps[plen] && (isalnum((unsigned char)ps[plen]) || ps[plen] == '_')) + plen++; + if (plen > 0 && plen < prop_sz) { + memcpy(prop_name_out, ps, (size_t)plen); + prop_name_out[plen] = '\0'; + return; + } + } + } + (void)method_name; +} + +/* Find InitialExpression value for a Property in the source. */ +static void scan_initial_expression(const char *source, const char *prop_name, char *out, + int outsz) { + out[0] = '\0'; + if (!source || !prop_name) + return; + char needle[CBM_SZ_256]; + snprintf(needle, sizeof(needle), "Property %s ", prop_name); + const char *p = strstr(source, needle); + if (!p) { + snprintf(needle, sizeof(needle), "Property %s[", prop_name); + p = strstr(source, needle); + } + if (!p) + return; + const char *ie = strstr(p, "InitialExpression ="); + if (!ie) + return; + ie = strchr(ie, '"'); + if (!ie) + return; + ie++; + const char *ie_end = strchr(ie, '"'); + if (!ie_end) + return; + int len = (int)(ie_end - ie); + if (len >= outsz) + len = outsz - 1; + memcpy(out, ie, (size_t)len); + out[len] = '\0'; +} + +static void collect_prod_defs(cbm_pipeline_ctx_t *ctx, ens_prod_def_t ***defs_out, int *count_out) { + const cbm_gbuf_node_t **xdata_nodes = NULL; + int xdata_count = 0; + cbm_gbuf_find_by_label(ctx->gbuf, "XData", (const cbm_gbuf_node_t ***)&xdata_nodes, + &xdata_count); + + ens_prod_def_t **defs = NULL; + int n = 0; + + for (int xi = 0; xi < xdata_count; xi++) { + const cbm_gbuf_node_t *xd = xdata_nodes[xi]; + if (!xd->name || strcmp(xd->name, "ProductionDefinition") != 0) + continue; + if (!xd->file_path || !ctx->repo_path) + continue; + + char full_path[CBM_SZ_1K]; + snprintf(full_path, sizeof(full_path), "%s/%s", ctx->repo_path, xd->file_path); + + char *source = read_file(full_path); + if (!source) + continue; + + char class_qn[CBM_SZ_256]; + class_qn[0] = '\0'; + if (xd->qualified_name) { + const char *dot = strrchr(xd->qualified_name, '.'); + if (dot) { + int len = (int)(dot - xd->qualified_name); + if (len > 0 && len < CBM_SZ_256) { + memcpy(class_qn, xd->qualified_name, (size_t)len); + class_qn[len] = '\0'; + } + } + } + if (!class_qn[0]) { + free(source); + continue; + } + + const char *xml_start = strstr(source, "file_path); + free(source); + if (!def) + continue; + + char n_items_buf[32]; + snprintf(n_items_buf, sizeof(n_items_buf), "%d", def->n_items); + cbm_log_info("ensemble_routing.parse", "class", class_qn, "items", n_items_buf); + + for (int i = 0; i < def->n_items; i++) { + ens_item_t *item = &def->items[i]; + char item_qn[CBM_SZ_512]; + snprintf(item_qn, sizeof(item_qn), "%s.%s", class_qn, item->item_name); + char iprops[CBM_SZ_512]; + snprintf(iprops, sizeof(iprops), + "{\"class_name\":\"%s\",\"enabled\":%s,\"production\":\"%s\"}", + item->class_name, item->enabled ? "true" : "false", class_qn); + cbm_gbuf_upsert_node(ctx->gbuf, "EnsembleItem", item->item_name, item_qn, xd->file_path, + xd->start_line, 0, iprops); + } + + ens_prod_def_t **tmp = realloc(defs, (size_t)(n + 1) * sizeof(ens_prod_def_t *)); + if (!tmp) { + free(def); + continue; + } + defs = tmp; + defs[n++] = def; + } + *defs_out = defs; + *count_out = n; +} + +static bool method_belongs_to_production(const cbm_gbuf_node_t *method, const ens_prod_def_t *def) { + if (!method->properties_json) + return false; + char parent_class[CBM_SZ_512]; + if (!jstr(method->properties_json, "parent_class", parent_class, sizeof(parent_class))) + return false; + for (int i = 0; i < def->n_items; i++) { + /* parent_class may carry a project/package prefix; match the class as a + * trailing dotted segment rather than a bare substring to avoid e.g. + * "Service" matching "MyDataService". */ + if (qn_ends_with_segment(parent_class, def->items[i].class_name)) + return true; + } + return false; +} + +static void resolve_method_routes(cbm_pipeline_ctx_t *ctx, const cbm_gbuf_node_t *method, + const char *source, const ens_prod_def_t *def) { + if (!method->properties_json) + return; + if (!method_belongs_to_production(method, def)) + return; + if (!strstr(source, "SendRequestSync")) + return; + + char literal[CBM_SZ_256], prop_name[CBM_SZ_256]; + scan_source_for_send_targets(source, method->name, literal, sizeof(literal), prop_name, + sizeof(prop_name)); + + if (literal[0]) { + const ens_item_t *item = find_item(def, literal); + if (item) + emit_route(ctx, method->id, item, "literal", CONF_LITERAL, def->production_class); + } else if (prop_name[0]) { + char init_expr[CBM_SZ_256]; + scan_initial_expression(source, prop_name, init_expr, sizeof(init_expr)); + if (init_expr[0]) { + const ens_item_t *item = find_item(def, init_expr); + if (item) + emit_route(ctx, method->id, item, prop_name, CONF_PROP, def->production_class); + } + } +} + +#define CONF_WORKMGR 0.90 + +/* Scan source for WorkMgr parallel dispatch: .Queue("##class(X).method", ...) + * Pattern: any receiver .Queue() call where first arg is "##class(Cls).Meth" */ +static void scan_workmgr_dispatch(cbm_pipeline_ctx_t *ctx, const cbm_gbuf_node_t *method, + const char *source) { + if (!source) + return; + const char *p = source; + const char *needle = ".Queue(\"##class("; + while ((p = strstr(p, needle)) != NULL) { + p += strlen(needle); + /* Extract class name up to ')' */ + const char *cls_end = strchr(p, ')'); + if (!cls_end) + continue; + int cls_len = (int)(cls_end - p); + if (cls_len <= 0 || cls_len >= CBM_SZ_256) { + p = cls_end; + continue; + } + char cls_name[CBM_SZ_256]; + memcpy(cls_name, p, (size_t)cls_len); + cls_name[cls_len] = '\0'; + + /* Expect '.' after ')' then method name up to '"' */ + const char *dot = cls_end + 1; + if (*dot != '.') { + p = dot; + continue; + } + const char *meth_start = dot + 1; + const char *meth_end = strchr(meth_start, '"'); + if (!meth_end) + continue; + int meth_len = (int)(meth_end - meth_start); + if (meth_len <= 0 || meth_len >= CBM_SZ_256) { + p = meth_end; + continue; + } + char meth_name[CBM_SZ_256]; + memcpy(meth_name, meth_start, (size_t)meth_len); + meth_name[meth_len] = '\0'; + + /* Find the target method in the gbuf by name within cls_name */ + char target_qn_suffix[CBM_SZ_512]; + snprintf(target_qn_suffix, sizeof(target_qn_suffix), "%s.%s", cls_name, meth_name); + + const cbm_gbuf_node_t **candidates = NULL; + int ccount = 0; + cbm_gbuf_find_by_name(ctx->gbuf, meth_name, (const cbm_gbuf_node_t ***)&candidates, + &ccount); + for (int ci = 0; ci < ccount; ci++) { + if (candidates[ci]->qualified_name && + qn_ends_with_segment(candidates[ci]->qualified_name, target_qn_suffix)) { + char props[CBM_SZ_256]; + snprintf(props, sizeof(props), "{\"via\":\"WorkMgr.Queue\",\"confidence\":%.2f}", + CONF_WORKMGR); + cbm_gbuf_insert_edge(ctx->gbuf, method->id, candidates[ci]->id, "CALLS", props); + break; + } + } + p = meth_end; + } +} + +void cbm_pipeline_pass_ensemble_routing(cbm_pipeline_ctx_t *ctx) { + if (!ctx || !ctx->gbuf || !ctx->repo_path) + return; + + const cbm_gbuf_node_t **method_nodes = NULL; + int method_count = 0; + cbm_gbuf_find_by_label(ctx->gbuf, "Method", (const cbm_gbuf_node_t ***)&method_nodes, + &method_count); + + /* Pass A: WorkMgr parallel dispatch — CALLS edges, independent of productions */ + int workmgr_edges = 0; + char last_path[CBM_SZ_1K] = {0}; + char *last_source = NULL; + for (int mi = 0; mi < method_count; mi++) { + const cbm_gbuf_node_t *m = method_nodes[mi]; + if (!m->file_path) + continue; + char full_path[CBM_SZ_1K]; + snprintf(full_path, sizeof(full_path), "%s/%s", ctx->repo_path, m->file_path); + if (strcmp(full_path, last_path) != 0) { + free(last_source); + last_source = read_file(full_path); + snprintf(last_path, sizeof(last_path), "%s", full_path); + } + if (!last_source || !strstr(last_source, ".Queue(\"##class(")) + continue; + /* Scope scan to this method's line range to avoid cross-method false positives */ + char *method_slice = NULL; + if (m->start_line > 0 && m->end_line >= m->start_line) { + const char *p = last_source; + int line = 1; + const char *method_start = NULL, *method_end = NULL; + while (*p) { + if (line == m->start_line) + method_start = p; + if (line == m->end_line + 1) { + method_end = p; + break; + } + if (*p == '\n') + line++; + p++; + } + if (!method_end) + method_end = p; + if (method_start && method_end > method_start) { + int slen = (int)(method_end - method_start); + method_slice = malloc((size_t)slen + 1); + if (method_slice) { + memcpy(method_slice, method_start, (size_t)slen); + method_slice[slen] = '\0'; + } + } + } + const char *scan_src = method_slice ? method_slice : last_source; + int before_w = cbm_gbuf_edge_count_by_type(ctx->gbuf, "CALLS"); + scan_workmgr_dispatch(ctx, m, scan_src); + free(method_slice); + workmgr_edges += cbm_gbuf_edge_count_by_type(ctx->gbuf, "CALLS") - before_w; + } + free(last_source); + if (workmgr_edges > 0) { + char wbuf[32]; + snprintf(wbuf, sizeof(wbuf), "%d", workmgr_edges); + cbm_log_info("ensemble_routing.workmgr", "edges", wbuf); + } + + /* Pass B: Ensemble production routing — ROUTES_TO edges */ + ens_prod_def_t **defs = NULL; + int n_defs = 0; + collect_prod_defs(ctx, &defs, &n_defs); + if (n_defs == 0) + return; + + int before = cbm_gbuf_edge_count_by_type(ctx->gbuf, "ROUTES_TO"); + + /* 1-deep file cache shared across the def/method loops below: the same .cls + * source is re-visited once per method per production, so caching the last + * file read avoids re-reading it from disk each time (mirrors Pass A). */ + char rb_last_path[CBM_SZ_1K] = {0}; + char *rb_last_source = NULL; + + for (int di = 0; di < n_defs; di++) { + ens_prod_def_t *def = defs[di]; + + for (int mi = 0; mi < method_count; mi++) { + const cbm_gbuf_node_t *m = method_nodes[mi]; + if (!m->properties_json || !m->file_path) + continue; + if (!method_belongs_to_production(m, def)) + continue; + + char meth_full_path[CBM_SZ_1K]; + snprintf(meth_full_path, sizeof(meth_full_path), "%s/%s", ctx->repo_path, m->file_path); + if (strcmp(meth_full_path, rb_last_path) != 0) { + free(rb_last_source); + rb_last_source = read_file(meth_full_path); + snprintf(rb_last_path, sizeof(rb_last_path), "%s", meth_full_path); + } + if (!rb_last_source) + continue; + resolve_method_routes(ctx, m, rb_last_source, def); + } + + for (int ii = 0; ii < def->n_items; ii++) { + const ens_item_t *item = &def->items[ii]; + for (int si = 0; si < item->n_settings; si++) { + const ens_setting_t *setting = &item->settings[si]; + if (!setting->value[0]) + continue; + const ens_item_t *target = find_item(def, setting->value); + if (!target) + continue; + char item_qn[CBM_SZ_512]; + snprintf(item_qn, sizeof(item_qn), "%s.%s", def->production_class, item->item_name); + const cbm_gbuf_node_t *item_node = cbm_gbuf_find_by_qn(ctx->gbuf, item_qn); + if (!item_node) + continue; + emit_route(ctx, item_node->id, target, setting->setting_name, CONF_PROP, + def->production_class); + } + } + + free(defs[di]); + } + free(rb_last_source); + free(defs); + + int routes = cbm_gbuf_edge_count_by_type(ctx->gbuf, "ROUTES_TO") - before; + char n_defs_buf[32], n_routes_buf[32]; + snprintf(n_defs_buf, sizeof(n_defs_buf), "%d", n_defs); + snprintf(n_routes_buf, sizeof(n_routes_buf), "%d", routes); + cbm_log_info("ensemble_routing.done", "productions", n_defs_buf, "routes", n_routes_buf); +} diff --git a/src/pipeline/pass_ensemble_routing.h b/src/pipeline/pass_ensemble_routing.h new file mode 100644 index 00000000..3017bf6a --- /dev/null +++ b/src/pipeline/pass_ensemble_routing.h @@ -0,0 +1,8 @@ +#ifndef CBM_PASS_ENSEMBLE_ROUTING_H +#define CBM_PASS_ENSEMBLE_ROUTING_H + +#include "pipeline/pipeline_internal.h" + +void cbm_pipeline_pass_ensemble_routing(cbm_pipeline_ctx_t *ctx); + +#endif /* CBM_PASS_ENSEMBLE_ROUTING_H */ diff --git a/src/pipeline/pass_parallel.c b/src/pipeline/pass_parallel.c index 180ee85f..b480b183 100644 --- a/src/pipeline/pass_parallel.c +++ b/src/pipeline/pass_parallel.c @@ -66,6 +66,9 @@ enum { PP_CSHARP_M_PREFIX_LEN = 2 }; #include "foundation/profile.h" #include "foundation/compat_regex.h" #include "cbm.h" +#include "arena.h" +#include "macro_table.h" +#include "iris_export_xml.h" #include "simhash/minhash.h" #include "semantic/ast_profile.h" @@ -485,6 +488,9 @@ typedef struct { cbm_pkg_entries_t *pkg_entries; /* per-worker manifest arrays (separate allocation) */ _Atomic int64_t retained_bytes; /* total source bytes copied into result arenas */ + + const CBMMacroTable *macro_table; /* ObjectScript $$$macros (NULL if none) */ + const CBMReturnTypeTable *return_type_table; /* ObjectScript return types (NULL if none) */ } extract_ctx_t; /* Insert one definition node (and its route if present) into the local gbuf. */ @@ -586,8 +592,38 @@ static void extract_worker(int worker_id, void *ctx_ptr) { uint64_t file_t0 = extract_now_ns(); - CBMFileResult *result = cbm_extract_file(source, source_len, fi->language, ec->project_name, - fi->rel_path, CBM_EXTRACT_BUDGET, NULL, NULL); + /* ObjectScript Studio Export XML: transcode each to UDL and + * extract directly into the local gbuf (the per-file cache holds a single + * result, so multi-class Export files are processed inline here). */ + if (fi->language == CBM_LANG_OBJECTSCRIPT_EXPORT) { + CBMArena ea; + cbm_arena_init(&ea); + int cc = 0; + char **udls = cbm_iris_export_to_udl(&ea, source, source_len, &cc); + for (int ci = 0; ci < cc; ci++) { + CBMFileResult *xr = + cbm_extract_file_ex(udls[ci], (int)strlen(udls[ci]), CBM_LANG_OBJECTSCRIPT_UDL, + ec->project_name, fi->rel_path, CBM_EXTRACT_BUDGET, NULL, + NULL, ec->macro_table, ec->return_type_table); + if (!xr) { + continue; + } + for (int d = 0; d < xr->defs.count; d++) { + CBMDefinition *def = &xr->defs.items[d]; + if (def->qualified_name && def->name) { + insert_def_into_gbuf(ws, fi, def); + } + } + cbm_free_result(xr); + } + cbm_arena_destroy(&ea); + free_source(source); + continue; + } + + CBMFileResult *result = cbm_extract_file_ex( + source, source_len, fi->language, ec->project_name, fi->rel_path, CBM_EXTRACT_BUDGET, + NULL, NULL, ec->macro_table, ec->return_type_table); uint64_t file_elapsed_ms = (extract_now_ns() - file_t0) / PP_USEC_PER_MS; @@ -707,6 +743,55 @@ static void log_extract_mem_stats(int worker_count) { } } +/* ObjectScript: build the $$$macro table from .inc files (parallel path). + * Returns NULL when no ObjectScript include files exist. Caller owns it. */ +static CBMMacroTable *pp_build_macro_table(const cbm_file_info_t *files, int count) { + bool has_inc = false; + for (int i = 0; i < count; i++) { + if (files[i].language == CBM_LANG_OBJECTSCRIPT_ROUTINE && files[i].path && + strstr(files[i].path, ".inc")) { + has_inc = true; + break; + } + } + if (!has_inc) { + return NULL; + } + CBMMacroTable *mt = (CBMMacroTable *)calloc(1, sizeof(CBMMacroTable)); + if (!mt) { + return NULL; + } + CBMArena mt_arena; + cbm_arena_init(&mt_arena); + cbm_macro_table_init_system(mt); + for (int i = 0; i < count; i++) { + if (files[i].language != CBM_LANG_OBJECTSCRIPT_ROUTINE) { + continue; + } + if (!files[i].path || !strstr(files[i].path, ".inc")) { + continue; + } + FILE *f = fopen(files[i].path, "rb"); + if (!f) { + continue; + } + fseek(f, 0, SEEK_END); + long fsize = ftell(f); + rewind(f); + if (fsize > 0) { + char *src = (char *)malloc((size_t)fsize + 1); + if (src) { + size_t nread = fread(src, 1, (size_t)fsize, f); + src[nread] = '\0'; + cbm_parse_inc_file(mt, &mt_arena, src); + free(src); + } + } + (void)fclose(f); + } + return mt; +} + int cbm_parallel_extract(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, int file_count, CBMFileResult **result_cache, _Atomic int64_t *shared_ids, int worker_count) { @@ -756,6 +841,9 @@ int cbm_parallel_extract(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, /* Per-worker manifest entry arrays (separate from cache-line-aligned worker state) */ cbm_pkg_entries_t *pkg_entries = calloc(worker_count, sizeof(cbm_pkg_entries_t)); + /* ObjectScript macro table (NULL when no .inc include files present). */ + CBMMacroTable *pp_macro_table = pp_build_macro_table(files, file_count); + extract_ctx_t ec = { .files = files, .sorted = sorted, @@ -768,6 +856,8 @@ int cbm_parallel_extract(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, .shared_ids = shared_ids, .cancelled = ctx->cancelled, .pkg_entries = pkg_entries, + .macro_table = pp_macro_table, + .return_type_table = ctx->return_type_table, }; atomic_init(&ec.next_worker_id, 0); atomic_init(&ec.next_file_idx, 0); @@ -797,6 +887,7 @@ int cbm_parallel_extract(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, cbm_aligned_free(workers); free(sorted); + free(pp_macro_table); /* ObjectScript macro table (NULL-safe) */ if (atomic_load(ctx->cancelled)) { return CBM_NOT_FOUND; diff --git a/src/pipeline/pipeline.c b/src/pipeline/pipeline.c index c080a285..3fa0b63f 100644 --- a/src/pipeline/pipeline.c +++ b/src/pipeline/pipeline.c @@ -18,9 +18,12 @@ enum { CBM_DIR_PERMS = 0755, PL_RING = 4, PL_RING_MASK = 3, PL_SEQ_PASSES = 6, P #include "pipeline/artifact.h" #include "pipeline/pipeline_internal.h" #include "pipeline/pass_lsp_cross.h" +#include "pipeline/pass_ensemble_routing.h" #include "pipeline/worker_pool.h" #include "graph_buffer/graph_buffer.h" #include "store/store.h" +#include "macro_table.h" +#include "arena.h" #include "discover/discover.h" #include "discover/userconfig.h" #include "foundation/platform.h" @@ -489,6 +492,9 @@ static void predump_cfg(cbm_pipeline_ctx_t *ctx) { static void predump_complexity(cbm_pipeline_ctx_t *ctx) { cbm_pipeline_pass_complexity(ctx); } +static void predump_ensemble(cbm_pipeline_ctx_t *ctx) { + cbm_pipeline_pass_ensemble_routing(ctx); +} static void run_predump_passes(cbm_pipeline_t *p, cbm_pipeline_ctx_t *ctx) { static const struct { @@ -496,11 +502,12 @@ static void run_predump_passes(cbm_pipeline_t *p, cbm_pipeline_ctx_t *ctx) { const char *name; bool moderate_only; /* true = skip in fast mode */ } passes[] = { - {predump_deco, "decorator_tags", false}, {predump_cfg, "configlink", false}, - {predump_route, "route_match", false}, {predump_sim, "similarity", true}, - {predump_sem, "semantic_edges", true}, {predump_complexity, "complexity", false}, + {predump_deco, "decorator_tags", false}, {predump_cfg, "configlink", false}, + {predump_route, "route_match", false}, {predump_sim, "similarity", true}, + {predump_sem, "semantic_edges", true}, {predump_complexity, "complexity", false}, + {predump_ensemble, "ensemble_routing", false}, }; - enum { PREDUMP_PASS_COUNT = 6 }; + enum { PREDUMP_PASS_COUNT = 7 }; struct timespec t; for (int i = 0; i < PREDUMP_PASS_COUNT && !check_cancel(p); i++) { /* "moderate_only" passes (similarity/semantic edges) run in FULL, @@ -533,6 +540,61 @@ static int seq_pass_lsp_cross_dispatch(cbm_pipeline_ctx_t *ctx, const cbm_file_i } /* Run the sequential pipeline path: definitions, k8s, lsp_cross, calls, usages, semantic. */ +/* Build the ObjectScript $$$macro table from .inc include files in the repo. + * Returns NULL (and does no work) when no ObjectScript include files exist. + * Caller owns the returned heap table. */ +static CBMMacroTable *cbm_build_macro_table_from_files(const cbm_file_info_t *files, int count, + const char *repo_path) { + (void)repo_path; + bool has_inc = false; + for (int i = 0; i < count; i++) { + if (files[i].language == CBM_LANG_OBJECTSCRIPT_ROUTINE && files[i].path && + strstr(files[i].path, ".inc")) { + has_inc = true; + break; + } + } + if (!has_inc) { + return NULL; + } + + CBMMacroTable *mt = (CBMMacroTable *)calloc(1, sizeof(CBMMacroTable)); + if (!mt) { + return NULL; + } + + CBMArena arena; + cbm_arena_init(&arena); + cbm_macro_table_init_system(mt); + + for (int i = 0; i < count; i++) { + if (files[i].language != CBM_LANG_OBJECTSCRIPT_ROUTINE) { + continue; + } + if (!files[i].path || !strstr(files[i].path, ".inc")) { + continue; + } + FILE *f = fopen(files[i].path, "rb"); + if (!f) { + continue; + } + fseek(f, 0, SEEK_END); + long fsize = ftell(f); + rewind(f); + if (fsize > 0) { + char *src = (char *)malloc((size_t)fsize + 1); + if (src) { + size_t nread = fread(src, 1, (size_t)fsize, f); + src[nread] = '\0'; + cbm_parse_inc_file(mt, &arena, src); + free(src); + } + } + (void)fclose(f); + } + return mt; +} + static int run_sequential_pipeline(cbm_pipeline_t *p, cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, int file_count, struct timespec *t) { @@ -549,6 +611,13 @@ static int run_sequential_pipeline(cbm_pipeline_t *p, cbm_pipeline_ctx_t *ctx, if (seq_cache) { ctx->result_cache = seq_cache; } + + /* ObjectScript: build the $$$macro table from .inc include files so that + * pass_calls can resolve macro-mediated dispatch. NULL when not present. */ + CBMMacroTable *mt = cbm_build_macro_table_from_files(files, file_count, ctx->repo_path); + if (mt) { + ctx->macro_table = mt; + } typedef int (*seq_pass_fn)(cbm_pipeline_ctx_t *, const cbm_file_info_t *, int); static const struct { seq_pass_fn fn; @@ -592,6 +661,15 @@ static int run_sequential_pipeline(cbm_pipeline_t *p, cbm_pipeline_ctx_t *ctx, free(seq_cache); ctx->result_cache = NULL; } + /* ObjectScript: free the macro / return-type tables built for this run. */ + if (ctx->macro_table) { + free((void *)ctx->macro_table); + ctx->macro_table = NULL; + } + if (ctx->return_type_table) { + free((void *)ctx->return_type_table); + ctx->return_type_table = NULL; + } return rc; } diff --git a/src/pipeline/pipeline_internal.h b/src/pipeline/pipeline_internal.h index 1eb10842..0beb2f57 100644 --- a/src/pipeline/pipeline_internal.h +++ b/src/pipeline/pipeline_internal.h @@ -69,6 +69,14 @@ typedef struct { * configs are an easy follow-on). NULL when no usable configs were found. * Owned by pipeline.c / pipeline_incremental.c. */ const cbm_path_alias_collection_t *path_aliases; + + /* ObjectScript $$$macro table built from .inc files in the repo (NULL if + * no ObjectScript include files were found). Owned by pipeline.c. */ + const CBMMacroTable *macro_table; + + /* ObjectScript method-return-type table built from extracted definitions + * (NULL until pass_calls builds it). Owned by pipeline.c. */ + const CBMReturnTypeTable *return_type_table; } cbm_pipeline_ctx_t; /* Get the current pipeline's package map (NULL if none). */ diff --git a/tests/test_extraction.c b/tests/test_extraction.c index 0308372c..4006d1a2 100644 --- a/tests/test_extraction.c +++ b/tests/test_extraction.c @@ -7,6 +7,8 @@ */ #include "test_framework.h" #include "cbm.h" +#include "macro_table.h" +#include "iris_export_xml.h" /* ── Helpers ───────────────────────────────────────────────────── */ @@ -64,6 +66,14 @@ static CBMFileResult *extract(const char *src, CBMLanguage lang, const char *pro return r; } +/* As extract(), but threads an ObjectScript macro table through. */ +static CBMFileResult *extract_with_macros(const char *src, CBMLanguage lang, const char *proj, + const char *path, const CBMMacroTable *mt) { + CBMFileResult *r = + cbm_extract_file_ex(src, (int)strlen(src), lang, proj, path, 0, NULL, NULL, mt, NULL); + return r; +} + /* ═══════════════════════════════════════════════════════════════════ * Group A: OOP Languages * ═══════════════════════════════════════════════════════════════════ */ @@ -2818,10 +2828,784 @@ TEST(complexity_access_depth_and_params) { * Suite * ═══════════════════════════════════════════════════════════════════ */ +/* =================================================================== + * Group H3: ObjectScript return type extraction + * =================================================================== */ + +TEST(objectscript_udl_method_return_type) { + CBMFileResult *r = extract("Class MyApp.Factory Extends %RegisteredObject\n" + "{\n" + "Method GetAdapter() As EnsLib.SQL.OutboundAdapter\n" + "{\n" + " Quit ##class(EnsLib.SQL.OutboundAdapter).%New()\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Factory.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + bool found_rt = false; + for (int i = 0; i < r->defs.count; i++) { + if (strcmp(r->defs.items[i].name, "GetAdapter") == 0) { + ASSERT_NOT_NULL(r->defs.items[i].return_type); + ASSERT(strstr(r->defs.items[i].return_type, "EnsLib.SQL.OutboundAdapter") != NULL); + found_rt = true; + } + } + ASSERT(found_rt); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_scalar_return_type_not_resolved) { + CBMFileResult *r = extract("Class MyApp.Counter Extends %RegisteredObject\n" + "{\n" + "Method GetName() As %String\n" + "{\n" + " Quit \"hello\"\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Counter.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + for (int i = 0; i < r->defs.count; i++) { + if (strcmp(r->defs.items[i].name, "GetName") == 0) { + ASSERT_NOT_NULL(r->defs.items[i].return_type); + ASSERT(strstr(r->defs.items[i].return_type, "%String") != NULL); + } + } + cbm_free_result(r); + PASS(); +} + +/* =================================================================== + * Group H2: ObjectScript macro expansion + * =================================================================== */ + +TEST(objectscript_udl_class) { + CBMFileResult *r = extract("Class MyApp.Patient Extends %Persistent\n{\n}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Patient.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "Class", "MyApp.Patient")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_methods_after_goto_label) { + CBMFileResult *r = extract("Class Graph.KG.Test Extends %RegisteredObject\n" + "{\n" + "ClassMethod First() As %String\n" + "{\n" + " If 1 { Goto Done }\n" + "Done\n" + " Quit \"x\"\n" + "}\n" + "ClassMethod Second() As %String\n" + "{\n" + " Quit \"y\"\n" + "}\n" + "ClassMethod Third() As %String\n" + "{\n" + " Quit \"z\"\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Test.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "Class", "Graph.KG.Test")); + ASSERT(has_def(r, "Method", "First")); + ASSERT(has_def(r, "Method", "Second")); + ASSERT(has_def(r, "Method", "Third")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_methods) { + CBMFileResult *r = extract("Class MyApp.Utils Extends %RegisteredObject\n" + "{\n" + "ClassMethod Format(pVal As %String) As %String\n" + "{\n" + " Quit pVal\n" + "}\n" + "Method Save() As %Status\n" + "{\n" + " Quit ..%Save()\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Utils.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "Class", "MyApp.Utils")); + ASSERT(has_def(r, "Method", "Format")); + ASSERT(has_def(r, "Method", "Save")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_base_classes) { + CBMFileResult *r = extract("Class MyApp.Patient Extends %Persistent\n" + "{\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Patient.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "Class", "MyApp.Patient")); + int found = 0; + for (int i = 0; i < r->defs.count; i++) { + if (strcmp(r->defs.items[i].name, "MyApp.Patient") == 0) { + found = 1; + ASSERT_NOT_NULL(r->defs.items[i].base_classes); + ASSERT_NOT_NULL(r->defs.items[i].base_classes[0]); + ASSERT_STR_EQ(r->defs.items[i].base_classes[0], "%Persistent"); + } + } + ASSERT_TRUE(found); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_multiple_bases) { + CBMFileResult *r = extract("Class MyApp.Dual Extends (MyApp.Base, %RegisteredObject)\n" + "{\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Dual.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + int found = 0; + for (int i = 0; i < r->defs.count; i++) { + if (strcmp(r->defs.items[i].name, "MyApp.Dual") == 0) { + found = 1; + ASSERT_NOT_NULL(r->defs.items[i].base_classes); + ASSERT_NOT_NULL(r->defs.items[i].base_classes[0]); + ASSERT_NOT_NULL(r->defs.items[i].base_classes[1]); + } + } + ASSERT_TRUE(found); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_properties) { + CBMFileResult *r = extract("Class MyApp.Patient Extends %Persistent\n" + "{\n" + "Property Name As %String;\n" + "Property DOB As %Date;\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Patient.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "Class", "MyApp.Patient")); + ASSERT(has_def(r, "Variable", "Name")); + ASSERT(has_def(r, "Variable", "DOB")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_routine_tags) { + CBMFileResult *r = extract("UTILS\n" + " Quit\n" + "\n" + "Format(value,fmt)\n" + " Set result = $ZDate(value, fmt)\n" + " Quit result\n" + "\n" + "Log(msg)\n" + " Write msg,!\n" + " Quit\n", + CBM_LANG_OBJECTSCRIPT_ROUTINE, "t", "Utils.mac"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "Function", "Format")); + ASSERT(has_def(r, "Function", "Log")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_query_member) { + CBMFileResult *r = + extract("Class MyApp.Repo Extends %Persistent\n" + "{\n" + "Query FindAll(name As %String) As %SQLQuery { SELECT * FROM MyApp_Repo }\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Repo.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "Class", "MyApp.Repo")); + ASSERT(has_def(r, "Method", "FindAll")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_index_member) { + CBMFileResult *r = extract("Class MyApp.Repo Extends %Persistent\n" + "{\n" + "Property Name As %String;\n" + "Index NameIdx On Name;\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Repo.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "Index", "NameIdx")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_xdata_member) { + CBMFileResult *r = extract("Class MyApp.Service Extends %CSP.REST\n" + "{\n" + "XData UrlMap { }\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Service.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "XData", "UrlMap")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_trigger_member) { + CBMFileResult *r = extract("Class MyApp.Log Extends %Persistent\n" + "{\n" + "Trigger AfterInsert [ Event = INSERT ] { }\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Log.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "Trigger", "AfterInsert")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_trigger_body_quit) { + CBMFileResult *r = extract("Class MyApp.Patient Extends %Persistent\n" + "{\n" + "Trigger OnDeleteSQL [ Event = DELETE, Time = AFTER ] {\n" + " Quit\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Patient.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "Trigger", "OnDeleteSQL")); + for (int i = 0; i < r->defs.count; i++) { + if (strcmp(r->defs.items[i].label, "Trigger") == 0 && + strcmp(r->defs.items[i].name, "OnDeleteSQL") == 0) { + ASSERT_NOT_NULL(r->defs.items[i].docstring); + ASSERT(strstr(r->defs.items[i].docstring, "trigger_body") != NULL); + ASSERT(strstr(r->defs.items[i].docstring, "Quit") != NULL); + break; + } + } + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_trigger_body_tokens) { + CBMFileResult *r = extract("Class MyApp.Order Extends %Persistent\n" + "{\n" + "Trigger AfterInsert [ Event = INSERT, Time = AFTER ] {\n" + " Set id = ..%Id()\n" + " Do ##class(MyApp.Audit).Log(id)\n" + " Quit\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Order.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "Trigger", "AfterInsert")); + for (int i = 0; i < r->defs.count; i++) { + if (strcmp(r->defs.items[i].label, "Trigger") == 0 && + strcmp(r->defs.items[i].name, "AfterInsert") == 0) { + ASSERT_NOT_NULL(r->defs.items[i].docstring); + ASSERT(strstr(r->defs.items[i].docstring, "trigger_body") != NULL); + ASSERT_NOT_NULL(r->defs.items[i].body_tokens); + ASSERT(strstr(r->defs.items[i].body_tokens, "Log") != NULL || + strstr(r->defs.items[i].body_tokens, "Audit") != NULL || + strstr(r->defs.items[i].body_tokens, "id") != NULL); + break; + } + } + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_self_call_relative_dot_method) { + CBMFileResult *r = + extract("Class HS.Flash.UpdateManager Extends Ens.BusinessProcess\n" + "{\n" + "Method MakeMRNUpToDate(pRequest As HS.Message.FlashQueueUpdate) As %Status\n" + "{\n" + " Set tSC = ..processStreamlet(pSession, pTS, tMPIID, tSourceMRN, ii)\n" + " Quit tSC\n" + "}\n" + "Method processStreamlet(pSession As %Integer) As %Status\n" + "{\n" + " Quit $$$OK\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "UpdateManager.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "Method", "MakeMRNUpToDate")); + ASSERT(has_call(r, "HS.Flash.UpdateManager.processStreamlet")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_calls_typed_new) { + CBMFileResult *r = extract("Class MyApp.Caller Extends %RegisteredObject\n" + "{\n" + "Method Run() As %Status\n" + "{\n" + " Set adapter = ##class(EnsLib.SQL.OutboundAdapter).%New()\n" + " Do adapter.ExecuteQuery(\"SELECT 1\")\n" + " Quit $$$OK\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Caller.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_call(r, "EnsLib.SQL.OutboundAdapter.ExecuteQuery")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_ensemble_production_def_parses_items) { + CBMFileResult *r = + extract("Class Sample.Production Extends Ens.Production\n" + "{\n" + "XData ProductionDefinition\n" + "{\n" + "\n" + " \n" + " \n" + " \n" + " \n" + "\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Production.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "XData", "ProductionDefinition")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_ensemble_production_def_hs_settings) { + CBMFileResult *r = extract( + "Class HS.Flash.Production Extends Ens.Production\n" + "{\n" + "XData ProductionDefinition\n" + "{\n" + "\n" + " \n" + " FHIROps\n" + " PatientOps\n" + " ConformOps\n" + " \n" + "\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "HSProduction.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "XData", "ProductionDefinition")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_ensemble_production_def_absent_no_error) { + CBMFileResult *r = extract("Class Sample.NonProduction Extends %Persistent\n" + "{\n" + "Method DoSomething() As %Status\n" + "{\n" + " Quit $$$OK\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "NonProduction.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(!has_def(r, "XData", "ProductionDefinition")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_calls_typed_param) { + CBMFileResult *r = extract("Class MyApp.Handler Extends %RegisteredObject\n" + "{\n" + "Method Process(req As Ens.Request) As %Status\n" + "{\n" + " Do req.Send()\n" + " Quit $$$OK\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Handler.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_call(r, "Ens.Request.Send")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_calls_typed_property) { + CBMFileResult *r = extract("Class MyApp.Service Extends Ens.BusinessService\n" + "{\n" + "Property Adapter As EnsLib.SQL.InboundAdapter;\n" + "Method OnProcessInput() As %Status\n" + "{\n" + " Do ..Adapter.ExecuteQuery(\"SELECT 1\")\n" + " Quit $$$OK\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Service.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_call(r, "EnsLib.SQL.InboundAdapter.ExecuteQuery")); + cbm_free_result(r); + PASS(); +} + +/* =================================================================== + * Group H2: ObjectScript macro expansion + * =================================================================== */ + +TEST(objectscript_macro_expand_system) { + CBMMacroTable mt; + cbm_macro_table_init_system(&mt); + CBMFileResult *r = extract_with_macros("Class MyApp.Caller Extends %RegisteredObject\n" + "{\n" + "Method Run(sc As %Status) As %Status\n" + "{\n" + " If $$$ISERR(sc) { Quit sc }\n" + " Quit $$$OK\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Caller.cls", &mt); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_call(r, "%SYSTEM.Status.IsError")); + cbm_free_result(r); + PASS(); +} + +/* ═══════════════════════════════════════════════════════════════════ + * Group H3: ObjectScript DATA_FLOWS argument extraction + * ═══════════════════════════════════════════════════════════════════ */ + +static int find_call_args(const CBMFileResult *r, const char *callee, const char **out_arg0, + const char **out_arg1) { + if (out_arg0) + *out_arg0 = NULL; + if (out_arg1) + *out_arg1 = NULL; + for (int i = 0; i < r->calls.count; i++) { + if (strstr(r->calls.items[i].callee_name, callee)) { + if (out_arg0 && r->calls.items[i].arg_count > 0) + *out_arg0 = r->calls.items[i].args[0].expr; + if (out_arg1 && r->calls.items[i].arg_count > 1) + *out_arg1 = r->calls.items[i].args[1].expr; + return r->calls.items[i].arg_count; + } + } + return -1; +} + +TEST(objectscript_data_flows_class_method_args) { + CBMFileResult *r = extract("Class MyApp.Caller Extends %RegisteredObject\n" + "{\n" + "Method Run() As %Status\n" + "{\n" + " Set sql = \"SELECT 1\"\n" + " Do ##class(MyApp.Utils).Transform(sql, \"JSON\")\n" + " Quit $$$OK\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Caller.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_call(r, "MyApp.Utils.Transform")); + const char *arg0 = NULL; + const char *arg1 = NULL; + int argc = find_call_args(r, "MyApp.Utils.Transform", &arg0, &arg1); + ASSERT(argc == 2); + ASSERT_NOT_NULL(arg0); + ASSERT(strstr(arg0, "sql") != NULL); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_macro_expand_local) { + CBMMacroTable mt; + cbm_macro_table_init_system(&mt); + CBMArena arena; + cbm_arena_init(&arena); + const char *inc_content = "ROUTINE MyApp.Include [Type=INC]\n" + "#define MyCheck(%sc) ##class(MyApp.Utils).Validate(%sc)\n"; + cbm_parse_inc_file(&mt, &arena, inc_content); + CBMFileResult *r = extract_with_macros("Class MyApp.Caller Extends %RegisteredObject\n" + "{\n" + "Method Run(sc As %Status) As %Status\n" + "{\n" + " If $$$MyCheck(sc) { Quit $$$OK }\n" + " Quit $$$OK\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Caller.cls", &mt); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_call(r, "MyApp.Utils.Validate")); + cbm_free_result(r); + cbm_arena_destroy(&arena); + PASS(); +} + +TEST(objectscript_macro_constant_no_extra_call) { + CBMMacroTable mt; + cbm_macro_table_init_system(&mt); + CBMArena arena; + cbm_arena_init(&arena); + const char *inc_content = "ROUTINE MyApp.Include [Type=INC]\n" + "#define MyConst 42\n"; + cbm_parse_inc_file(&mt, &arena, inc_content); + CBMFileResult *r = extract_with_macros("Class MyApp.Caller Extends %RegisteredObject\n" + "{\n" + "Method Run() As %Integer\n" + "{\n" + " Set x = $$$MyConst\n" + " Quit x\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Caller.cls", &mt); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(!has_call(r, "$$$MyConst")); + cbm_free_result(r); + cbm_arena_destroy(&arena); + PASS(); +} + +TEST(objectscript_data_flows_instance_method_args) { + CBMFileResult *r = extract("Class MyApp.Service Extends %RegisteredObject\n" + "{\n" + "Method Run() As %Status\n" + "{\n" + " Set adapter = ##class(EnsLib.SQL.OutboundAdapter).%New()\n" + " Do adapter.ExecuteQuery(\"SELECT 1\")\n" + " Quit $$$OK\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Service.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_call(r, "EnsLib.SQL.OutboundAdapter.ExecuteQuery")); + const char *arg0 = NULL; + int argc = find_call_args(r, "EnsLib.SQL.OutboundAdapter.ExecuteQuery", &arg0, NULL); + ASSERT(argc == 1); + ASSERT_NOT_NULL(arg0); + cbm_free_result(r); + PASS(); +} + +/* =================================================================== + * Group H4: IRIS Export XML → UDL transcoder + * =================================================================== */ + +#define SIMPLE_EXPORT \ + "\n" \ + "\n" \ + "\n" \ + "%RegisteredObject\n" \ + "\n" \ + "%String\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" + +TEST(iris_export_xml_simple_class) { + CBMArena arena; + cbm_arena_init(&arena); + int count = 0; + char **udl = cbm_iris_export_to_udl(&arena, SIMPLE_EXPORT, (int)strlen(SIMPLE_EXPORT), &count); + ASSERT_NOT_NULL(udl); + ASSERT(count == 1); + ASSERT_NOT_NULL(udl[0]); + ASSERT(strstr(udl[0], "Test.Simple") != NULL); + ASSERT(strstr(udl[0], "%RegisteredObject") != NULL); + ASSERT(strstr(udl[0], "Hello") != NULL); + ASSERT(strstr(udl[0], "Quit \"hello\"") != NULL); + cbm_arena_destroy(&arena); + PASS(); +} + +#define CLASSMETHOD_EXPORT \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ + "1\n" \ + "pArg:%String,pFlag:%Boolean=0\n" \ + "%Status\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" + +TEST(iris_export_xml_classmethod) { + CBMArena arena; + cbm_arena_init(&arena); + int count = 0; + char **udl = + cbm_iris_export_to_udl(&arena, CLASSMETHOD_EXPORT, (int)strlen(CLASSMETHOD_EXPORT), &count); + ASSERT_NOT_NULL(udl); + ASSERT(count == 1); + ASSERT(strstr(udl[0], "ClassMethod") != NULL); + ASSERT(strstr(udl[0], "pArg") != NULL); + ASSERT(strstr(udl[0], "pFlag") != NULL); + ASSERT(strstr(udl[0], "%Status") != NULL); + cbm_arena_destroy(&arena); + PASS(); +} + +#define MEMBER_EXPORT \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ + "%String\n" \ + "\n" \ + "\n" \ + "\n" \ + "1\n" \ + "\n" \ + "\n" \ + "Name\n" \ + "1\n" \ + "\n" \ + "\n" \ + "\n" + +TEST(iris_export_xml_property_parameter_index) { + CBMArena arena; + cbm_arena_init(&arena); + int count = 0; + char **udl = cbm_iris_export_to_udl(&arena, MEMBER_EXPORT, (int)strlen(MEMBER_EXPORT), &count); + ASSERT_NOT_NULL(udl); + ASSERT(count == 1); + ASSERT(strstr(udl[0], "Property Name") != NULL); + ASSERT(strstr(udl[0], "%String") != NULL); + ASSERT(strstr(udl[0], "Parameter VERSION") != NULL); + ASSERT(strstr(udl[0], "Index NameIdx") != NULL); + cbm_arena_destroy(&arena); + PASS(); +} + +#define CALLS_EXPORT \ + "\n" \ + "\n" \ + "\n" \ + "%RegisteredObject\n" \ + "\n" \ + "1\n" \ + "%Status\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" + +TEST(iris_export_xml_calls_extracted) { + CBMArena arena; + cbm_arena_init(&arena); + int count = 0; + char **udl = cbm_iris_export_to_udl(&arena, CALLS_EXPORT, (int)strlen(CALLS_EXPORT), &count); + ASSERT_NOT_NULL(udl); + ASSERT(count == 1); + CBMFileResult *r = cbm_extract_file(udl[0], (int)strlen(udl[0]), CBM_LANG_OBJECTSCRIPT_UDL, "t", + "Caller.cls", 0, NULL, NULL); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_call(r, "Target.Worker.Execute")); + cbm_free_result(r); + cbm_arena_destroy(&arena); + PASS(); +} + +#define MULTI_EXPORT \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" + +TEST(iris_export_xml_multi_class) { + CBMArena arena; + cbm_arena_init(&arena); + int count = 0; + char **udl = cbm_iris_export_to_udl(&arena, MULTI_EXPORT, (int)strlen(MULTI_EXPORT), &count); + ASSERT_NOT_NULL(udl); + ASSERT(count == 2); + ASSERT(strstr(udl[0], "Test.First") != NULL || strstr(udl[1], "Test.First") != NULL); + ASSERT(strstr(udl[0], "Test.Second") != NULL || strstr(udl[1], "Test.Second") != NULL); + cbm_arena_destroy(&arena); + PASS(); +} + SUITE(extraction) { /* Initialize extraction library */ cbm_init(); + /* InterSystems ObjectScript (UDL / routine / Export XML). + * NOTE: the *_udl / *_routine / ensemble / self-call / type-inference / + * macro-expand tests require the tree-sitter-objectscript grammar to be + * linked. The grammar is vendored separately; until then these tests will + * fail to link. The pure cbm_iris_export_to_udl transcoder and macro_table + * unit assertions are grammar-free. */ + RUN_TEST(objectscript_udl_class); + RUN_TEST(objectscript_udl_methods_after_goto_label); + RUN_TEST(objectscript_udl_methods); + RUN_TEST(objectscript_udl_base_classes); + RUN_TEST(objectscript_udl_multiple_bases); + RUN_TEST(objectscript_udl_properties); + RUN_TEST(objectscript_routine_tags); + RUN_TEST(objectscript_udl_query_member); + RUN_TEST(objectscript_udl_index_member); + RUN_TEST(objectscript_udl_xdata_member); + RUN_TEST(objectscript_udl_trigger_member); + RUN_TEST(objectscript_udl_trigger_body_quit); + RUN_TEST(objectscript_udl_trigger_body_tokens); + RUN_TEST(objectscript_udl_ensemble_production_def_parses_items); + RUN_TEST(objectscript_udl_ensemble_production_def_hs_settings); + RUN_TEST(objectscript_udl_ensemble_production_def_absent_no_error); + RUN_TEST(objectscript_udl_self_call_relative_dot_method); + RUN_TEST(objectscript_udl_calls_typed_new); + RUN_TEST(objectscript_udl_calls_typed_param); + RUN_TEST(objectscript_udl_calls_typed_property); + RUN_TEST(objectscript_macro_expand_system); + RUN_TEST(objectscript_macro_expand_local); + RUN_TEST(objectscript_macro_constant_no_extra_call); + RUN_TEST(objectscript_udl_method_return_type); + RUN_TEST(objectscript_udl_scalar_return_type_not_resolved); + RUN_TEST(objectscript_data_flows_class_method_args); + RUN_TEST(objectscript_data_flows_instance_method_args); + RUN_TEST(iris_export_xml_simple_class); + RUN_TEST(iris_export_xml_classmethod); + RUN_TEST(iris_export_xml_property_parameter_index); + RUN_TEST(iris_export_xml_calls_extracted); + RUN_TEST(iris_export_xml_multi_class); + /* R box-module imports + member calls */ RUN_TEST(extract_r_box_use_imports_issue218); RUN_TEST(extract_r_dollar_call_issue219);