diff --git a/.gitignore b/.gitignore index 53e3ade..a002eaa 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,5 @@ models/ site/node_modules site/.docusaurus site/build +.mcp.json +.notes/ diff --git a/package-lock.json b/package-lock.json index d6dd311..17fe3db 100644 --- a/package-lock.json +++ b/package-lock.json @@ -28,6 +28,7 @@ "pino": "^10.3.1", "pino-pretty": "^13.1.3", "redis": "^5.11.0", + "tree-sitter": "^0.21.1", "web-tree-sitter": "^0.26.7", "ws": "^8.19.0", "yaml": "^2.8.2", @@ -50,6 +51,7 @@ "graphology-types": "^0.24.8", "jest": "^30.3.0", "supertest": "^7.2.2", + "tree-sitter-gdscript": "^6.1.0", "ts-jest": "^29.4.6", "tsc-alias": "^1.8.16", "tsx": "^4.21.0", @@ -6413,6 +6415,26 @@ "dev": true, "license": "MIT" }, + "node_modules/node-addon-api": { + "version": "8.7.0", + "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-8.7.0.tgz", + "integrity": "sha512-9MdFxmkKaOYVTV+XVRG8ArDwwQ77XIgIPyKASB1k3JPq3M8fGQQQE3YpMOrKm6g//Ktx8ivZr8xo1Qmtqub+GA==", + "license": "MIT", + "engines": { + "node": "^18 || ^20 || >= 21" + } + }, + "node_modules/node-gyp-build": { + "version": "4.8.4", + "resolved": "https://registry.npmjs.org/node-gyp-build/-/node-gyp-build-4.8.4.tgz", + "integrity": "sha512-LA4ZjwlnUblHVgq0oBF3Jl/6h/Nvs5fzBLwdEF4nuxnFdsfajde4WfxtJr3CaiH+F6ewcIB/q4jQ4UzPyid+CQ==", + "license": "MIT", + "bin": { + "node-gyp-build": "bin.js", + "node-gyp-build-optional": "optional.js", + "node-gyp-build-test": "build-test.js" + } + }, "node_modules/node-int64": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/node-int64/-/node-int64-0.4.0.tgz", @@ -8007,6 +8029,37 @@ "node": ">=0.6" } }, + "node_modules/tree-sitter": { + "version": "0.21.1", + "resolved": "https://registry.npmjs.org/tree-sitter/-/tree-sitter-0.21.1.tgz", + "integrity": "sha512-7dxoA6kYvtgWw80265MyqJlkRl4yawIjO7S5MigytjELkX43fV2WsAXzsNfO7sBpPPCF5Gp0+XzHk0DwLCq3xQ==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.0.0", + "node-gyp-build": "^4.8.0" + } + }, + "node_modules/tree-sitter-gdscript": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/tree-sitter-gdscript/-/tree-sitter-gdscript-6.1.0.tgz", + "integrity": "sha512-Uy5+GWLkec2JaS1mamiJYsC9j/JoW2FxFq4bnji95gyTtMTsqO6+RSVIaBTU/vRGSNEi3PVDZzyV0j3B4RdntA==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.3.1", + "node-gyp-build": "^4.8.4" + }, + "peerDependencies": { + "tree-sitter": "^0.21.1" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, "node_modules/ts-jest": { "version": "29.4.6", "resolved": "https://registry.npmjs.org/ts-jest/-/ts-jest-29.4.6.tgz", diff --git a/package.json b/package.json index c3bcd15..de316a2 100644 --- a/package.json +++ b/package.json @@ -18,7 +18,8 @@ "test:watch": "NODE_OPTIONS='--experimental-vm-modules' jest --watch", "site:dev": "cd site && npm start", "site:build": "cd site && npm run build", - "site:serve": "cd site && npm run serve" + "site:serve": "cd site && npm run serve", + "postinstall": "node scripts/postinstall.js" }, "repository": { "type": "git", @@ -44,6 +45,8 @@ }, "files": [ "dist/", + "wasm/", + "scripts/postinstall.js", "README.md" ], "publishConfig": { @@ -69,6 +72,7 @@ "pino": "^10.3.1", "pino-pretty": "^13.1.3", "redis": "^5.11.0", + "tree-sitter": "^0.21.1", "web-tree-sitter": "^0.26.7", "ws": "^8.19.0", "yaml": "^2.8.2", @@ -88,6 +92,7 @@ "graphology-types": "^0.24.8", "jest": "^30.3.0", "supertest": "^7.2.2", + "tree-sitter-gdscript": "^6.1.0", "ts-jest": "^29.4.6", "tsc-alias": "^1.8.16", "tsx": "^4.21.0", diff --git a/scripts/postinstall.js b/scripts/postinstall.js new file mode 100644 index 0000000..af15dbc --- /dev/null +++ b/scripts/postinstall.js @@ -0,0 +1,22 @@ +#!/usr/bin/env node +'use strict'; + +const path = require('path'); +const fs = require('fs'); + +const src = path.join(__dirname, '..', 'wasm', 'tree-sitter-gdscript.wasm'); + +let wasmDir; +try { + wasmDir = path.join(path.dirname(require.resolve('@vscode/tree-sitter-wasm/package.json')), 'wasm'); +} catch { + // @vscode/tree-sitter-wasm not installed yet — skip + process.exit(0); +} + +const dest = path.join(wasmDir, 'tree-sitter-gdscript.wasm'); + +if (!fs.existsSync(dest)) { + fs.copyFileSync(src, dest); + console.log('graphmemory: installed tree-sitter-gdscript.wasm'); +} diff --git a/src/graphs/file-lang.ts b/src/graphs/file-lang.ts index 84d003c..651b528 100644 --- a/src/graphs/file-lang.ts +++ b/src/graphs/file-lang.ts @@ -123,6 +123,26 @@ export const EXT_TO_LANGUAGE: Record = { // Zig '.zig': 'zig', + + // Godot + '.gd': 'gdscript', + '.gdshader': 'glsl', + '.gdshaderinc': 'glsl', + '.tscn': 'godot-scene', + '.escn': 'godot-scene', + '.tres': 'godot-resource', + '.godot': 'godot-project', + '.gdextension': 'godot-extension', + + // Shaders + '.glsl': 'glsl', + '.vert': 'glsl', + '.frag': 'glsl', + '.geom': 'glsl', + '.tesc': 'glsl', + '.tese': 'glsl', + '.comp': 'glsl', + '.hlsl': 'glsl', }; /** Look up language from file extension. Returns null if unknown. */ diff --git a/src/lib/multi-config.ts b/src/lib/multi-config.ts index 0d7360c..73c5875 100644 --- a/src/lib/multi-config.ts +++ b/src/lib/multi-config.ts @@ -407,7 +407,7 @@ const SERVER_DEFAULTS: Omit & { embedding: EmbeddingC const PROJECT_DEFAULTS = { docsInclude: '**/*.md', - codeInclude: '**/*.{js,ts,jsx,tsx,mjs,mts,cjs,cts}', + codeInclude: '**/*.{js,ts,jsx,tsx,mjs,mts,cjs,cts,gd,gdshader,gdshaderinc,glsl,tscn,escn,tres,godot,gdextension}', chunkDepth: 4, }; diff --git a/src/lib/parsers/code.ts b/src/lib/parsers/code.ts index 37f6ffe..ece6590 100644 --- a/src/lib/parsers/code.ts +++ b/src/lib/parsers/code.ts @@ -1,7 +1,18 @@ import fs from 'fs'; import path from 'path'; import type { CodeNodeAttributes, CodeEdgeAttributes } from '@/graphs/code-types'; -import { parseSource, getMapper, isLanguageSupported } from '@/lib/parsers/languages'; +import { + parseSource, + getMapper, + getRegexMapper, + isLanguageSupported, + isRegexLanguageSupported, +} from '@/lib/parsers/languages'; +import type { + ExtractedSymbol, + ExtractedEdge, + ExtractedImport, +} from '@/lib/parsers/languages'; import { getLanguage } from '@/graphs/file-lang'; // Strip line and block comments from JSONC, preserving string contents. @@ -172,6 +183,18 @@ function resolveAliasImport(specifier: string, fromFile: string, projectDir: str // Main parser // --------------------------------------------------------------------------- +function makeFileOnlyResult(fileId: string, mtime: number): ParsedFile { + return { + fileId, + mtime, + nodes: [{ + id: fileId, + attrs: makeFileAttrs(fileId, '', '', 1, mtime), + }], + edges: [], + }; +} + export async function parseCodeFile( absolutePath: string, codeDir: string, @@ -183,46 +206,48 @@ export async function parseCodeFile( const ext = path.extname(absolutePath); const language = getLanguage(ext); - if (!language || !isLanguageSupported(language)) { - // Unsupported language — return file-only node, no symbols - return { - fileId, - mtime, - nodes: [{ - id: fileId, - attrs: makeFileAttrs(fileId, '', '', 1, mtime), - }], - edges: [], - }; + if (!language) return makeFileOnlyResult(fileId, mtime); + + const treeSitterAvailable = isLanguageSupported(language); + const regexAvailable = !treeSitterAvailable && isRegexLanguageSupported(language); + + if (!treeSitterAvailable && !regexAvailable) { + // Language is detected but no parser available — return file-only node. + return makeFileOnlyResult(fileId, mtime); } const source = fs.readFileSync(absolutePath, 'utf-8'); - const tree = await parseSource(source, language); - - if (!tree) { - return { - fileId, - mtime, - nodes: [{ - id: fileId, - attrs: makeFileAttrs(fileId, '', '', 1, mtime), - }], - edges: [], - }; - } - const rootNode = tree.rootNode; - const mapper = getMapper(language)!; - let symbols, edgeInfos, imports, fileDocComment, importSummary, lastLine; - try { - symbols = mapper.extractSymbols(rootNode); - edgeInfos = mapper.extractEdges(rootNode); - imports = mapper.extractImports(rootNode); - fileDocComment = extractFileDocComment(rootNode); - importSummary = buildImportSummary(rootNode); - lastLine = (rootNode.endPosition?.row ?? 0) + 1; - } finally { - tree.delete(); + let symbols: ExtractedSymbol[]; + let edgeInfos: ExtractedEdge[]; + let imports: ExtractedImport[]; + let fileDocComment = ''; + let importSummary = ''; + let lastLine: number; + + if (treeSitterAvailable) { + const tree = await parseSource(source, language); + if (!tree) return makeFileOnlyResult(fileId, mtime); + + const rootNode = tree.rootNode; + const mapper = getMapper(language)!; + try { + symbols = mapper.extractSymbols(rootNode); + edgeInfos = mapper.extractEdges(rootNode); + imports = mapper.extractImports(rootNode); + fileDocComment = extractFileDocComment(rootNode); + importSummary = buildImportSummary(rootNode); + lastLine = (rootNode.endPosition?.row ?? 0) + 1; + } finally { + tree.delete(); + } + } else { + // Regex fallback path — operates on raw source text. + const mapper = getRegexMapper(language)!; + symbols = mapper.extractSymbols(source); + edgeInfos = mapper.extractEdges(source); + imports = mapper.extractImports(source); + lastLine = source.split(/\r?\n/).length; } const nodes: ParsedFile['nodes'] = []; diff --git a/src/lib/parsers/languages/bash.ts b/src/lib/parsers/languages/bash.ts new file mode 100644 index 0000000..370cd27 --- /dev/null +++ b/src/lib/parsers/languages/bash.ts @@ -0,0 +1,86 @@ +import type { ExtractedSymbol, ExtractedEdge, ExtractedImport, LanguageMapper } from './types'; +import { registerLanguage } from './registry'; +import { + type TSNode, + truncate, + startLine, + endLine, + sliceBeforeBody, + buildBody, + getPrecedingDoc, +} from './helpers'; + +function getDoc(node: TSNode): string { + return getPrecedingDoc(node, ['comment'], '#'); +} + +function buildSig(node: TSNode): string { + const body = node.childForFieldName('body'); + if (!body) return truncate(node.text ?? ''); + const header = sliceBeforeBody(node, body); + return truncate(header ?? (node.text ?? '').split('\n')[0]); +} + +const bashMapper: LanguageMapper = { + extractSymbols(rootNode: TSNode): ExtractedSymbol[] { + const symbols: ExtractedSymbol[] = []; + + function visit(node: TSNode): void { + if (node.type === 'function_definition') { + const nameNode = node.childForFieldName('name'); + const name = nameNode?.text ?? ''; + if (name) { + const doc = getDoc(node); + symbols.push({ + name, + kind: 'function', + signature: buildSig(node), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: !name.startsWith('_'), + }); + return; // don't recurse into function body + } + } + for (const child of node.children ?? []) visit(child); + } + + visit(rootNode); + return symbols; + }, + + extractEdges(_rootNode: TSNode): ExtractedEdge[] { + return []; + }, + + extractImports(rootNode: TSNode): ExtractedImport[] { + const imports: ExtractedImport[] = []; + + function visit(node: TSNode): void { + // source ./foo or . ./foo + if (node.type === 'command') { + const nameNode = node.childForFieldName('name'); + if (nameNode?.text === 'source' || nameNode?.text === '.') { + const args = node.childForFieldName('argument'); + const arg = args ?? node.namedChildren?.find((c: TSNode) => c.type === 'word'); + if (arg) imports.push({ specifier: arg.text }); + } + } + for (const child of node.children ?? []) visit(child); + } + + visit(rootNode); + return imports; + }, +}; + +let _registered = false; + +export function registerBash(): void { + if (_registered) return; + _registered = true; + registerLanguage('shell', 'tree-sitter-bash.wasm', bashMapper); + registerLanguage('bash', 'tree-sitter-bash.wasm', bashMapper); +} diff --git a/src/lib/parsers/languages/cpp.ts b/src/lib/parsers/languages/cpp.ts new file mode 100644 index 0000000..82f7ca0 --- /dev/null +++ b/src/lib/parsers/languages/cpp.ts @@ -0,0 +1,258 @@ +import type { ExtractedSymbol, ExtractedEdge, ExtractedImport, LanguageMapper } from './types'; +import { registerLanguage } from './registry'; +import { + type TSNode, + truncate, + startLine, + endLine, + sliceBeforeBody, + buildBody, + getPrecedingDoc, +} from './helpers'; + +function getDoc(node: TSNode): string { + return getPrecedingDoc(node, ['comment'], '/**') || + getPrecedingDoc(node, ['comment'], '//'); +} + +function buildSig(node: TSNode): string { + const body = node.childForFieldName('body'); + if (!body) return truncate(node.text ?? ''); + const header = sliceBeforeBody(node, body); + return truncate(header ?? (node.text ?? '').split('\n')[0]); +} + +/** + * Extract the function name from a C/C++ function_definition. + * function_definition.declarator can be: + * function_declarator → declarator (identifier | qualified_identifier | destructor_name | ...) + * pointer_declarator → declarator (function_declarator → ...) + * reference_declarator → ... + */ +function getFunctionName(node: TSNode): string | null { + function walkDeclarator(d: TSNode): string | null { + if (!d) return null; + if (d.type === 'identifier' || d.type === 'field_identifier') return d.text; + if (d.type === 'qualified_identifier') { + // last part of A::B::C + const name = d.childForFieldName('name'); + return name?.text ?? d.text; + } + if (d.type === 'destructor_name') return d.text; + if (d.type === 'operator_name') return d.text; + if (d.type === 'function_declarator') { + return walkDeclarator(d.childForFieldName('declarator')); + } + if (d.type === 'pointer_declarator' || d.type === 'reference_declarator' || + d.type === 'abstract_pointer_declarator') { + return walkDeclarator(d.childForFieldName('declarator')); + } + return null; + } + + const decl = node.childForFieldName('declarator'); + return walkDeclarator(decl); +} + +function extractClassMembers(body: TSNode): ExtractedSymbol[] { + const children: ExtractedSymbol[] = []; + if (!body) return children; + + for (const member of body.namedChildren ?? []) { + if (member.type === 'function_definition') { + const name = getFunctionName(member); + if (!name) continue; + const doc = getDoc(member); + children.push({ + name, + kind: 'method', + signature: buildSig(member), + docComment: doc, + body: buildBody(member, doc), + startLine: startLine(member), + endLine: endLine(member), + isExported: false, + }); + } else if (member.type === 'declaration') { + // Field declaration inside class + const decl = member.childForFieldName('declarator'); + const name = decl ? getFunctionName(decl) ?? decl.text : null; + if (!name) continue; + const doc = getDoc(member); + children.push({ + name, + kind: 'variable', + signature: truncate(member.text ?? ''), + docComment: doc, + body: buildBody(member, doc), + startLine: startLine(member), + endLine: endLine(member), + isExported: false, + }); + } + } + return children; +} + +function processTopLevel(node: TSNode): ExtractedSymbol[] { + switch (node.type) { + case 'function_definition': { + const name = getFunctionName(node); + if (!name) return []; + const doc = getDoc(node); + return [{ + name, + kind: 'function', + signature: buildSig(node), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: true, + }]; + } + case 'class_specifier': + case 'struct_specifier': { + const nameNode = node.childForFieldName('name'); + const name = nameNode?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + const body = node.childForFieldName('body'); + const children = extractClassMembers(body); + return [{ + name, + kind: node.type === 'struct_specifier' ? 'type' : 'class', + signature: buildSig(node), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: true, + children: children.length > 0 ? children : undefined, + }]; + } + case 'namespace_definition': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + const body = node.childForFieldName('body'); + const inner: ExtractedSymbol[] = []; + for (const child of body?.namedChildren ?? []) { + inner.push(...processTopLevel(child)); + } + return [{ + name, + kind: 'interface', + signature: truncate(node.text?.split('\n')[0] ?? ''), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: true, + children: inner.length > 0 ? inner : undefined, + }]; + } + case 'template_declaration': { + // template<...> wraps a function or class — unwrap and process + for (const child of node.namedChildren ?? []) { + const results = processTopLevel(child); + if (results.length > 0) return results; + } + return []; + } + case 'enum_specifier': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + return [{ + name, + kind: 'enum', + signature: truncate(node.text?.split('\n')[0] ?? ''), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: true, + }]; + } + default: + return []; + } +} + +const cppMapper: LanguageMapper = { + extractSymbols(rootNode: TSNode): ExtractedSymbol[] { + const symbols: ExtractedSymbol[] = []; + + function visit(node: TSNode): void { + const results = processTopLevel(node); + if (results.length > 0) { + symbols.push(...results); + return; + } + // Recurse into declaration nodes (e.g. typedef struct) + if (node.type === 'declaration' || node.type === 'type_definition') { + for (const child of node.namedChildren ?? []) { + symbols.push(...processTopLevel(child)); + } + return; + } + for (const child of node.children ?? []) visit(child); + } + + visit(rootNode); + return symbols; + }, + + extractEdges(rootNode: TSNode): ExtractedEdge[] { + const edges: ExtractedEdge[] = []; + + function visit(node: TSNode): void { + if (node.type === 'class_specifier') { + const className = node.childForFieldName('name')?.text; + if (className) { + const baseClause = node.childForFieldName('base_class_clause'); + if (baseClause) { + for (const base of baseClause.namedChildren ?? []) { + if (base.type === 'type_identifier' || base.type === 'qualified_identifier') { + edges.push({ fromName: className, toName: base.text, kind: 'extends' }); + } + } + } + } + } + for (const child of node.children ?? []) visit(child); + } + + visit(rootNode); + return edges; + }, + + extractImports(rootNode: TSNode): ExtractedImport[] { + const imports: ExtractedImport[] = []; + + function visit(node: TSNode): void { + if (node.type === 'preproc_include') { + const path = node.childForFieldName('path'); + if (path) { + const specifier = path.text.replace(/^["<]|[">]$/g, ''); + imports.push({ specifier }); + } + } + for (const child of node.children ?? []) visit(child); + } + + visit(rootNode); + return imports; + }, +}; + +let _registered = false; + +export function registerCpp(): void { + if (_registered) return; + _registered = true; + // cpp WASM handles both C and C++ (C++ is a superset of C) + registerLanguage('cpp', 'tree-sitter-cpp.wasm', cppMapper); + registerLanguage('c', 'tree-sitter-cpp.wasm', cppMapper); +} diff --git a/src/lib/parsers/languages/csharp.ts b/src/lib/parsers/languages/csharp.ts new file mode 100644 index 0000000..3b0c764 --- /dev/null +++ b/src/lib/parsers/languages/csharp.ts @@ -0,0 +1,298 @@ +import type { ExtractedSymbol, ExtractedEdge, ExtractedImport, LanguageMapper } from './types'; +import { registerLanguage } from './registry'; +import { + type TSNode, + truncate, + startLine, + endLine, + sliceBeforeBody, + buildBody, + getPrecedingDoc, +} from './helpers'; + +function getDoc(node: TSNode): string { + // C# uses /// single-line doc comments or /** */ block comments + return getPrecedingDoc(node, ['single_line_doc_comment'], '///') || + getPrecedingDoc(node, ['multiline_comment'], '/**') || + getPrecedingDoc(node, ['single_line_comment']); +} + +function buildSig(node: TSNode): string { + const body = node.childForFieldName('body'); + if (!body) return truncate(node.text ?? ''); + const header = sliceBeforeBody(node, body); + return truncate(header ?? (node.text ?? '').split('\n')[0]); +} + +function hasModifier(node: TSNode, mod: string): boolean { + for (const child of node.children ?? []) { + if (child.type === 'modifier' && child.text === mod) return true; + } + return false; +} + +function isPublic(node: TSNode): boolean { + return hasModifier(node, 'public'); +} + +function extractTypeMembers(body: TSNode): ExtractedSymbol[] { + const children: ExtractedSymbol[] = []; + if (!body) return children; + for (const member of body.namedChildren ?? []) { + switch (member.type) { + case 'method_declaration': { + const name = member.childForFieldName('name')?.text ?? ''; + if (!name) continue; + const doc = getDoc(member); + children.push({ + name, + kind: 'method', + signature: buildSig(member), + docComment: doc, + body: buildBody(member, doc), + startLine: startLine(member), + endLine: endLine(member), + isExported: isPublic(member), + }); + break; + } + case 'constructor_declaration': { + const name = member.childForFieldName('name')?.text ?? ''; + if (!name) continue; + const doc = getDoc(member); + children.push({ + name, + kind: 'constructor', + signature: buildSig(member), + docComment: doc, + body: buildBody(member, doc), + startLine: startLine(member), + endLine: endLine(member), + isExported: isPublic(member), + }); + break; + } + case 'field_declaration': { + // field_declaration has variable_declarator children + for (const decl of member.namedChildren ?? []) { + if (decl.type === 'variable_declarator') { + const name = decl.childForFieldName('name')?.text ?? ''; + if (!name) continue; + const doc = getDoc(member); + children.push({ + name, + kind: 'variable', + signature: truncate(member.text ?? ''), + docComment: doc, + body: buildBody(member, doc), + startLine: startLine(member), + endLine: endLine(member), + isExported: isPublic(member), + }); + } + } + break; + } + case 'property_declaration': { + const name = member.childForFieldName('name')?.text ?? ''; + if (!name) continue; + const doc = getDoc(member); + children.push({ + name, + kind: 'variable', + signature: truncate(member.text?.split('\n')[0] ?? ''), + docComment: doc, + body: buildBody(member, doc), + startLine: startLine(member), + endLine: endLine(member), + isExported: isPublic(member), + }); + break; + } + } + } + return children; +} + +function processDeclaration(node: TSNode): ExtractedSymbol[] { + switch (node.type) { + case 'class_declaration': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + const body = node.childForFieldName('body'); + const children = extractTypeMembers(body); + return [{ + name, + kind: 'class', + signature: buildSig(node), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: isPublic(node), + children: children.length > 0 ? children : undefined, + }]; + } + case 'interface_declaration': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + return [{ + name, + kind: 'interface', + signature: buildSig(node), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: isPublic(node), + }]; + } + case 'struct_declaration': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + const body = node.childForFieldName('body'); + const children = extractTypeMembers(body); + return [{ + name, + kind: 'type', + signature: buildSig(node), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: isPublic(node), + children: children.length > 0 ? children : undefined, + }]; + } + case 'enum_declaration': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + return [{ + name, + kind: 'enum', + signature: buildSig(node), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: isPublic(node), + }]; + } + case 'method_declaration': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + return [{ + name, + kind: 'function', + signature: buildSig(node), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: isPublic(node), + }]; + } + case 'namespace_declaration': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + const body = node.childForFieldName('body'); + // Recurse into namespace body + const inner: ExtractedSymbol[] = []; + for (const child of body?.namedChildren ?? []) { + inner.push(...processDeclaration(child)); + } + return [{ + name, + kind: 'interface', + signature: truncate(node.text?.split('\n')[0] ?? ''), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: true, + children: inner.length > 0 ? inner : undefined, + }]; + } + default: + return []; + } +} + +const csharpMapper: LanguageMapper = { + extractSymbols(rootNode: TSNode): ExtractedSymbol[] { + const symbols: ExtractedSymbol[] = []; + + function visit(node: TSNode): void { + const results = processDeclaration(node); + if (results.length > 0) { + symbols.push(...results); + return; + } + for (const child of node.children ?? []) visit(child); + } + + visit(rootNode); + return symbols; + }, + + extractEdges(rootNode: TSNode): ExtractedEdge[] { + const edges: ExtractedEdge[] = []; + + function visit(node: TSNode): void { + if (node.type === 'class_declaration' || node.type === 'struct_declaration') { + const typeName = node.childForFieldName('name')?.text; + if (typeName) { + const baseList = node.childForFieldName('bases'); + if (baseList) { + for (const base of baseList.namedChildren ?? []) { + const baseName = base.type === 'identifier' ? base.text + : base.childForFieldName('name')?.text ?? base.text; + if (baseName) { + edges.push({ fromName: typeName, toName: baseName, kind: 'extends' }); + } + } + } + } + } + for (const child of node.children ?? []) visit(child); + } + + visit(rootNode); + return edges; + }, + + extractImports(rootNode: TSNode): ExtractedImport[] { + const imports: ExtractedImport[] = []; + + function visit(node: TSNode): void { + if (node.type === 'using_directive') { + // using_directive: using (static)? name ; + for (const child of node.namedChildren ?? []) { + if (child.type === 'identifier' || child.type === 'qualified_name' || + child.type === 'alias_qualified_name') { + imports.push({ specifier: child.text }); + break; + } + } + } + for (const child of node.children ?? []) visit(child); + } + + visit(rootNode); + return imports; + }, +}; + +let _registered = false; + +export function registerCsharp(): void { + if (_registered) return; + _registered = true; + registerLanguage('csharp', 'tree-sitter-c-sharp.wasm', csharpMapper); +} diff --git a/src/lib/parsers/languages/gdscript.ts b/src/lib/parsers/languages/gdscript.ts new file mode 100644 index 0000000..9f95831 --- /dev/null +++ b/src/lib/parsers/languages/gdscript.ts @@ -0,0 +1,307 @@ +import type { ExtractedSymbol, ExtractedEdge, ExtractedImport, LanguageMapper } from './types'; +import { registerLanguage } from './registry'; +import { + type TSNode, + truncate, + startLine, + endLine, + sliceBeforeBody, + buildBody, + getPrecedingDoc, +} from './helpers'; + +function getDoc(node: TSNode): string { + return getPrecedingDoc(node, ['comment'], '#'); +} + +function buildSig(node: TSNode): string { + const body = node.childForFieldName('body'); + if (!body) return truncate(node.text ?? ''); + const header = sliceBeforeBody(node, body); + return truncate(header ?? (node.text ?? '').split('\n')[0]); +} + +/** Extract the base class name from extends_statement. */ +function getExtendsName(node: TSNode): string | null { + const ext = node.childForFieldName('extends'); + if (!ext) return null; + // extends_statement children: type or string + for (const c of ext.namedChildren ?? []) { + if (c.type === 'type' || c.type === 'string' || c.type === 'identifier') { + return c.text.replace(/^["']|["']$/g, ''); + } + } + return ext.text.replace(/^["']|["']$/g, '') || null; +} + +function extractClassMembers(body: TSNode): ExtractedSymbol[] { + const children: ExtractedSymbol[] = []; + if (!body) return children; + + for (const stmt of body.namedChildren ?? []) { + switch (stmt.type) { + case 'function_definition': { + const name = stmt.childForFieldName('name')?.text ?? ''; + if (!name) continue; + const doc = getDoc(stmt); + children.push({ + name, + kind: 'method', + signature: buildSig(stmt), + docComment: doc, + body: buildBody(stmt, doc), + startLine: startLine(stmt), + endLine: endLine(stmt), + isExported: !name.startsWith('_'), + }); + break; + } + case 'constructor_definition': { + const doc = getDoc(stmt); + children.push({ + name: '_init', + kind: 'constructor', + signature: buildSig(stmt), + docComment: doc, + body: buildBody(stmt, doc), + startLine: startLine(stmt), + endLine: endLine(stmt), + isExported: false, + }); + break; + } + case 'variable_statement': + case 'export_variable_statement': { + const name = stmt.childForFieldName('name')?.text ?? ''; + if (!name) continue; + const doc = getDoc(stmt); + children.push({ + name, + kind: 'variable', + signature: truncate(stmt.text ?? ''), + docComment: doc, + body: buildBody(stmt, doc), + startLine: startLine(stmt), + endLine: endLine(stmt), + isExported: stmt.type === 'export_variable_statement', + }); + break; + } + case 'signal_statement': { + const name = stmt.childForFieldName('name')?.text ?? ''; + if (!name) continue; + const doc = getDoc(stmt); + children.push({ + name, + kind: 'variable', + signature: truncate(stmt.text ?? ''), + docComment: doc, + body: buildBody(stmt, doc), + startLine: startLine(stmt), + endLine: endLine(stmt), + isExported: true, + }); + break; + } + } + } + return children; +} + +function processTopLevel(node: TSNode): ExtractedSymbol[] { + switch (node.type) { + case 'function_definition': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + return [{ + name, + kind: 'function', + signature: buildSig(node), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: !name.startsWith('_'), + }]; + } + case 'constructor_definition': { + const doc = getDoc(node); + return [{ + name: '_init', + kind: 'function', + signature: buildSig(node), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: false, + }]; + } + case 'class_definition': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + const body = node.childForFieldName('body'); + const children = extractClassMembers(body); + return [{ + name, + kind: 'class', + signature: buildSig(node), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: true, + children: children.length > 0 ? children : undefined, + }]; + } + case 'class_name_statement': { + // class_name MyClass [extends Base] — top-level class declaration + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + return [{ + name, + kind: 'class', + signature: truncate(node.text ?? ''), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: true, + }]; + } + case 'enum_definition': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + return [{ + name, + kind: 'enum', + signature: truncate(node.text?.split('\n')[0] ?? ''), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: true, + }]; + } + case 'signal_statement': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + return [{ + name, + kind: 'variable', + signature: truncate(node.text ?? ''), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: true, + }]; + } + case 'variable_statement': + case 'export_variable_statement': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + return [{ + name, + kind: 'variable', + signature: truncate(node.text ?? ''), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: node.type === 'export_variable_statement', + }]; + } + case 'const_statement': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + return [{ + name, + kind: 'variable', + signature: truncate(node.text ?? ''), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: true, + }]; + } + default: + return []; + } +} + +const gdscriptMapper: LanguageMapper = { + extractSymbols(rootNode: TSNode): ExtractedSymbol[] { + const symbols: ExtractedSymbol[] = []; + for (const child of rootNode.children ?? []) { + symbols.push(...processTopLevel(child)); + } + return symbols; + }, + + extractEdges(rootNode: TSNode): ExtractedEdge[] { + const edges: ExtractedEdge[] = []; + + // class_name_statement extends Base → file-level inheritance + for (const child of rootNode.children ?? []) { + if (child.type === 'class_name_statement') { + const className = child.childForFieldName('name')?.text; + const baseName = getExtendsName(child); + if (className && baseName) { + edges.push({ fromName: className, toName: baseName, kind: 'extends' }); + } + } + if (child.type === 'class_definition') { + const className = child.childForFieldName('name')?.text; + const baseName = getExtendsName(child); + if (className && baseName) { + edges.push({ fromName: className, toName: baseName, kind: 'extends' }); + } + } + } + + return edges; + }, + + extractImports(rootNode: TSNode): ExtractedImport[] { + const imports: ExtractedImport[] = []; + + function visit(node: TSNode): void { + // preload("res://foo.gd") or load("res://foo.gd") + if (node.type === 'call') { + const fn = node.namedChildren?.[0]; + if (fn?.type === 'identifier' && (fn.text === 'preload' || fn.text === 'load')) { + const args = node.childForFieldName('arguments'); + if (args) { + for (const arg of args.namedChildren ?? []) { + if (arg.type === 'string') { + const specifier = arg.text.replace(/^["']|["']$/g, ''); + imports.push({ specifier }); + } + } + } + } + } + for (const child of node.children ?? []) visit(child); + } + + visit(rootNode); + return imports; + }, +}; + +let _registered = false; + +export function registerGdscript(): void { + if (_registered) return; + _registered = true; + registerLanguage('gdscript', 'tree-sitter-gdscript.wasm', gdscriptMapper); +} diff --git a/src/lib/parsers/languages/go.ts b/src/lib/parsers/languages/go.ts new file mode 100644 index 0000000..89724c9 --- /dev/null +++ b/src/lib/parsers/languages/go.ts @@ -0,0 +1,156 @@ +import type { ExtractedSymbol, ExtractedEdge, ExtractedImport, LanguageMapper } from './types'; +import { registerLanguage } from './registry'; +import { + type TSNode, + truncate, + startLine, + endLine, + sliceBeforeBody, + buildBody, + getPrecedingDoc, +} from './helpers'; + +function getDoc(node: TSNode): string { + return getPrecedingDoc(node, ['comment']); +} + +function buildSig(node: TSNode): string { + const body = node.childForFieldName('body'); + if (!body) return truncate(node.text ?? ''); + const header = sliceBeforeBody(node, body); + return truncate(header ?? (node.text ?? '').split('\n')[0]); +} + +function extractMethods(body: TSNode): ExtractedSymbol[] { + // Go structs don't have methods inside body — methods are top-level with receiver. + // We extract struct fields as children instead. + const children: ExtractedSymbol[] = []; + if (!body) return children; + // field_declaration_list → field_declaration + for (const child of body.namedChildren ?? []) { + if (child.type === 'field_declaration') { + // field has named children: names (field_identifier) and type + for (const n of child.namedChildren ?? []) { + if (n.type === 'field_identifier') { + children.push({ + name: n.text ?? '', + kind: 'variable', + signature: truncate(child.text ?? ''), + docComment: '', + body: child.text ?? '', + startLine: startLine(child), + endLine: endLine(child), + isExported: false, + }); + } + } + } + } + return children; +} + +function processTopLevel(node: TSNode): ExtractedSymbol[] { + switch (node.type) { + case 'function_declaration': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + return [{ + name, + kind: 'function', + signature: buildSig(node), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: /^[A-Z]/.test(name), + }]; + } + case 'method_declaration': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + return [{ + name, + kind: 'method', + signature: buildSig(node), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: /^[A-Z]/.test(name), + }]; + } + case 'type_declaration': { + const symbols: ExtractedSymbol[] = []; + for (const spec of node.namedChildren ?? []) { + if (spec.type !== 'type_spec') continue; + const name = spec.childForFieldName('name')?.text ?? ''; + if (!name) continue; + const typeNode = spec.childForFieldName('type'); + const kind = typeNode?.type === 'struct_type' ? 'class' + : typeNode?.type === 'interface_type' ? 'interface' + : 'type'; + const doc = getDoc(node); + const children = kind === 'class' ? extractMethods(typeNode) : undefined; + symbols.push({ + name, + kind, + signature: truncate(node.text ?? ''), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: /^[A-Z]/.test(name), + children: children && children.length > 0 ? children : undefined, + }); + } + return symbols; + } + default: + return []; + } +} + +const goMapper: LanguageMapper = { + extractSymbols(rootNode: TSNode): ExtractedSymbol[] { + const symbols: ExtractedSymbol[] = []; + for (const child of rootNode.children ?? []) { + symbols.push(...processTopLevel(child)); + } + return symbols; + }, + + extractEdges(_rootNode: TSNode): ExtractedEdge[] { + // Go uses embedding, not traditional inheritance — skip edges + return []; + }, + + extractImports(rootNode: TSNode): ExtractedImport[] { + const imports: ExtractedImport[] = []; + + function collectSpecs(node: TSNode): void { + if (node.type === 'import_spec') { + const path = node.childForFieldName('path'); + if (path) { + const specifier = path.text.replace(/^["'`]|["'`]$/g, ''); + imports.push({ specifier }); + } + } + for (const child of node.namedChildren ?? []) collectSpecs(child); + } + + for (const child of rootNode.children ?? []) { + if (child.type === 'import_declaration') collectSpecs(child); + } + return imports; + }, +}; + +let _registered = false; + +export function registerGo(): void { + if (_registered) return; + _registered = true; + registerLanguage('go', 'tree-sitter-go.wasm', goMapper); +} diff --git a/src/lib/parsers/languages/helpers.ts b/src/lib/parsers/languages/helpers.ts new file mode 100644 index 0000000..c5e225b --- /dev/null +++ b/src/lib/parsers/languages/helpers.ts @@ -0,0 +1,90 @@ +/** + * Shared utilities for tree-sitter language mappers. + */ +import { SIGNATURE_MAX_LEN } from '@/lib/defaults'; + +export type TSNode = any; + +export function truncate(text: string, maxLen = SIGNATURE_MAX_LEN): string { + const collapsed = text.replace(/\s+/g, ' ').trim(); + return collapsed.length > maxLen ? collapsed.slice(0, maxLen) + '…' : collapsed; +} + +export function startLine(node: TSNode): number { + return (node.startPosition?.row ?? 0) + 1; +} + +export function endLine(node: TSNode): number { + return (node.endPosition?.row ?? 0) + 1; +} + +/** + * Slice outerNode.text up to where bodyNode begins (line-based to avoid + * tree-sitter byte-offset vs JS char-offset mismatch). + */ +export function sliceBeforeBody(outerNode: TSNode, bodyNode: TSNode): string | null { + const text = outerNode.text ?? ''; + const outerStartRow = outerNode.startPosition.row; + const bodyStartRow = bodyNode.startPosition.row; + + if (bodyStartRow > outerStartRow) { + const lines = text.split('\n'); + const relativeRow = bodyStartRow - outerStartRow; + const beforeBody = lines.slice(0, relativeRow); + const bodyLine = lines[relativeRow] ?? ''; + const col = bodyNode.startPosition.column; + if (col > 0) beforeBody.push(bodyLine.slice(0, col)); + return beforeBody.join('\n'); + } + + const col = bodyNode.startPosition.column - outerNode.startPosition.column; + if (col > 0) return text.slice(0, col); + return null; +} + +/** Build signature: everything before body node, or first line fallback. */ +export function buildSignature(node: TSNode, bodyFieldName = 'body'): string { + const bodyNode = node.childForFieldName(bodyFieldName); + const text = node.text ?? ''; + if (!bodyNode) return truncate(text); + const header = sliceBeforeBody(node, bodyNode); + return truncate(header ?? text.split('\n')[0]); +} + +export function buildBody(node: TSNode, docComment: string): string { + if (docComment) return docComment + '\n' + (node.text ?? ''); + return node.text ?? ''; +} + +/** + * Find the nearest preceding doc comment. + * @param nodeTypes set of comment node types to accept (default: ['comment']) + * @param prefix required text prefix (e.g. '/**', '///', '#') + */ +export function getPrecedingDoc( + node: TSNode, + nodeTypes: string[] = ['comment'], + prefix?: string, +): string { + const types = new Set(nodeTypes); + let prev = node.previousNamedSibling; + + if (prefix) { + while (prev && types.has(prev.type) && !prev.text.startsWith(prefix)) { + prev = prev.previousNamedSibling; + } + } + + if (prev && types.has(prev.type) && (!prefix || prev.text.startsWith(prefix))) { + return prev.text.trim(); + } + return ''; +} + +/** Walk named children of a node, calling visitor for each. */ +export function walkChildren(node: TSNode, visitor: (child: TSNode) => void): void { + if (!node) return; + for (const child of node.namedChildren ?? []) { + visitor(child); + } +} diff --git a/src/lib/parsers/languages/index.ts b/src/lib/parsers/languages/index.ts index ac03474..48d4c68 100644 --- a/src/lib/parsers/languages/index.ts +++ b/src/lib/parsers/languages/index.ts @@ -1,7 +1,64 @@ -export { registerLanguage, isLanguageSupported, parseSource, getMapper, listLanguages, initParser } from './registry'; -export type { LanguageMapper, ExtractedSymbol, ExtractedEdge, ExtractedImport } from './types'; +export { + registerLanguage, + registerRegexLanguage, + isLanguageSupported, + isRegexLanguageSupported, + parseSource, + getMapper, + getRegexMapper, + listLanguages, + listRegexLanguages, + initParser, +} from './registry'; +export type { + LanguageMapper, + RegexLanguageMapper, + ExtractedSymbol, + ExtractedEdge, + ExtractedImport, +} from './types'; export { registerTypescript } from './typescript'; +export { registerPython } from './python'; +export { registerGo } from './go'; +export { registerRust } from './rust'; +export { registerJava } from './java'; +export { registerPhp } from './php'; +export { registerRuby } from './ruby'; +export { registerCsharp } from './csharp'; +export { registerCpp } from './cpp'; +export { registerBash } from './bash'; +export { registerGdscript } from './gdscript'; +export { + createRegexMapper, + type RegexMapperOptions, + type RegexSymbolPattern, + type RegexImportPattern, +} from './regex-mapper'; +export { registerRegexLanguages } from './regex-patterns'; // Auto-register built-in languages on import import { registerTypescript } from './typescript'; +import { registerPython } from './python'; +import { registerGo } from './go'; +import { registerRust } from './rust'; +import { registerJava } from './java'; +import { registerPhp } from './php'; +import { registerRuby } from './ruby'; +import { registerCsharp } from './csharp'; +import { registerCpp } from './cpp'; +import { registerBash } from './bash'; +import { registerGdscript } from './gdscript'; +import { registerRegexLanguages } from './regex-patterns'; + registerTypescript(); +registerPython(); +registerGo(); +registerRust(); +registerJava(); +registerPhp(); +registerRuby(); +registerCsharp(); +registerCpp(); +registerBash(); +registerGdscript(); +registerRegexLanguages(); diff --git a/src/lib/parsers/languages/java.ts b/src/lib/parsers/languages/java.ts new file mode 100644 index 0000000..50923b2 --- /dev/null +++ b/src/lib/parsers/languages/java.ts @@ -0,0 +1,234 @@ +import type { ExtractedSymbol, ExtractedEdge, ExtractedImport, LanguageMapper } from './types'; +import { registerLanguage } from './registry'; +import { + type TSNode, + truncate, + startLine, + endLine, + sliceBeforeBody, + buildBody, + getPrecedingDoc, +} from './helpers'; + +function getDoc(node: TSNode): string { + return getPrecedingDoc(node, ['block_comment', 'line_comment'], '/**') || + getPrecedingDoc(node, ['line_comment']); +} + +function buildSig(node: TSNode): string { + const body = node.childForFieldName('body'); + if (!body) return truncate(node.text ?? ''); + const header = sliceBeforeBody(node, body); + return truncate(header ?? (node.text ?? '').split('\n')[0]); +} + +function isPublic(node: TSNode): boolean { + const mods = node.childForFieldName('modifiers'); + if (!mods) return false; + return mods.text?.includes('public') ?? false; +} + +function extractClassMembers(body: TSNode): ExtractedSymbol[] { + const children: ExtractedSymbol[] = []; + if (!body) return children; + for (const member of body.namedChildren ?? []) { + switch (member.type) { + case 'method_declaration': { + const name = member.childForFieldName('name')?.text ?? ''; + if (!name) continue; + const doc = getDoc(member); + children.push({ + name, + kind: 'method', + signature: buildSig(member), + docComment: doc, + body: buildBody(member, doc), + startLine: startLine(member), + endLine: endLine(member), + isExported: isPublic(member), + }); + break; + } + case 'constructor_declaration': { + const name = member.childForFieldName('name')?.text ?? ''; + if (!name) continue; + const doc = getDoc(member); + children.push({ + name, + kind: 'constructor', + signature: buildSig(member), + docComment: doc, + body: buildBody(member, doc), + startLine: startLine(member), + endLine: endLine(member), + isExported: isPublic(member), + }); + break; + } + case 'field_declaration': { + const decl = member.childForFieldName('declarator'); + const name = decl?.childForFieldName('name')?.text ?? ''; + if (!name) continue; + const doc = getDoc(member); + children.push({ + name, + kind: 'variable', + signature: truncate(member.text ?? ''), + docComment: doc, + body: buildBody(member, doc), + startLine: startLine(member), + endLine: endLine(member), + isExported: isPublic(member), + }); + break; + } + } + } + return children; +} + +function processTopLevel(node: TSNode): ExtractedSymbol[] { + switch (node.type) { + case 'class_declaration': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + const body = node.childForFieldName('body'); + const children = extractClassMembers(body); + return [{ + name, + kind: 'class', + signature: buildSig(node), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: isPublic(node), + children: children.length > 0 ? children : undefined, + }]; + } + case 'interface_declaration': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + return [{ + name, + kind: 'interface', + signature: buildSig(node), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: isPublic(node), + }]; + } + case 'enum_declaration': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + return [{ + name, + kind: 'enum', + signature: buildSig(node), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: isPublic(node), + }]; + } + case 'annotation_type_declaration': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + return [{ + name, + kind: 'type', + signature: truncate(node.text?.split('\n')[0] ?? ''), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: isPublic(node), + }]; + } + default: + return []; + } +} + +const javaMapper: LanguageMapper = { + extractSymbols(rootNode: TSNode): ExtractedSymbol[] { + const symbols: ExtractedSymbol[] = []; + + function visit(node: TSNode): void { + const results = processTopLevel(node); + if (results.length > 0) { + symbols.push(...results); + return; // don't recurse into processed nodes (children already extracted) + } + for (const child of node.children ?? []) visit(child); + } + + visit(rootNode); + return symbols; + }, + + extractEdges(rootNode: TSNode): ExtractedEdge[] { + const edges: ExtractedEdge[] = []; + + function visit(node: TSNode): void { + if (node.type === 'class_declaration') { + const className = node.childForFieldName('name')?.text; + if (className) { + const superclass = node.childForFieldName('superclass'); + if (superclass) { + // superclass node contains 'extends' keyword + type_identifier + for (const c of superclass.namedChildren ?? []) { + if (c.type === 'type_identifier') { + edges.push({ fromName: className, toName: c.text, kind: 'extends' }); + } + } + } + const interfaces = node.childForFieldName('interfaces'); + if (interfaces) { + for (const c of interfaces.namedChildren ?? []) { + if (c.type === 'type_identifier') { + edges.push({ fromName: className, toName: c.text, kind: 'implements' }); + } + } + } + } + } + for (const child of node.children ?? []) visit(child); + } + + visit(rootNode); + return edges; + }, + + extractImports(rootNode: TSNode): ExtractedImport[] { + const imports: ExtractedImport[] = []; + for (const child of rootNode.children ?? []) { + if (child.type === 'import_declaration') { + // import_declaration: import (static)? name ; + // name can be scoped_identifier or asterisk + for (const n of child.namedChildren ?? []) { + if (n.type === 'scoped_identifier' || n.type === 'identifier') { + imports.push({ specifier: n.text }); + break; + } + } + } + } + return imports; + }, +}; + +let _registered = false; + +export function registerJava(): void { + if (_registered) return; + _registered = true; + registerLanguage('java', 'tree-sitter-java.wasm', javaMapper); +} diff --git a/src/lib/parsers/languages/php.ts b/src/lib/parsers/languages/php.ts new file mode 100644 index 0000000..37091ba --- /dev/null +++ b/src/lib/parsers/languages/php.ts @@ -0,0 +1,196 @@ +import type { ExtractedSymbol, ExtractedEdge, ExtractedImport, LanguageMapper } from './types'; +import { registerLanguage } from './registry'; +import { + type TSNode, + truncate, + startLine, + endLine, + sliceBeforeBody, + buildBody, + getPrecedingDoc, +} from './helpers'; + +function getDoc(node: TSNode): string { + return getPrecedingDoc(node, ['comment'], '/**') || + getPrecedingDoc(node, ['comment']); +} + +function buildSig(node: TSNode): string { + const body = node.childForFieldName('body'); + if (!body) return truncate(node.text ?? ''); + const header = sliceBeforeBody(node, body); + return truncate(header ?? (node.text ?? '').split('\n')[0]); +} + +function extractClassMembers(body: TSNode): ExtractedSymbol[] { + const children: ExtractedSymbol[] = []; + if (!body) return children; + for (const member of body.namedChildren ?? []) { + if (member.type === 'method_declaration') { + const name = member.childForFieldName('name')?.text ?? ''; + if (!name) continue; + const doc = getDoc(member); + children.push({ + name, + kind: name === '__construct' ? 'constructor' : 'method', + signature: buildSig(member), + docComment: doc, + body: buildBody(member, doc), + startLine: startLine(member), + endLine: endLine(member), + isExported: false, + }); + } + } + return children; +} + +function processTopLevel(node: TSNode): ExtractedSymbol[] { + switch (node.type) { + case 'function_definition': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + return [{ + name, + kind: 'function', + signature: buildSig(node), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: true, + }]; + } + case 'class_declaration': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + const body = node.childForFieldName('body'); + const children = extractClassMembers(body); + return [{ + name, + kind: 'class', + signature: buildSig(node), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: true, + children: children.length > 0 ? children : undefined, + }]; + } + case 'interface_declaration': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + return [{ + name, + kind: 'interface', + signature: buildSig(node), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: true, + }]; + } + case 'trait_declaration': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + const body = node.childForFieldName('body'); + const children = extractClassMembers(body); + return [{ + name, + kind: 'class', + signature: buildSig(node), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: true, + children: children.length > 0 ? children : undefined, + }]; + } + default: + return []; + } +} + +const phpMapper: LanguageMapper = { + extractSymbols(rootNode: TSNode): ExtractedSymbol[] { + const symbols: ExtractedSymbol[] = []; + + function visit(node: TSNode): void { + const results = processTopLevel(node); + if (results.length > 0) { + symbols.push(...results); + return; + } + for (const child of node.children ?? []) visit(child); + } + + visit(rootNode); + return symbols; + }, + + extractEdges(rootNode: TSNode): ExtractedEdge[] { + const edges: ExtractedEdge[] = []; + + function visit(node: TSNode): void { + if (node.type === 'class_declaration') { + const className = node.childForFieldName('name')?.text; + if (className) { + const base = node.childForFieldName('base_clause'); + if (base) { + for (const c of base.namedChildren ?? []) { + if (c.type === 'qualified_name' || c.type === 'name') { + edges.push({ fromName: className, toName: c.text, kind: 'extends' }); + } + } + } + const impl = node.childForFieldName('class_implements'); + if (impl) { + for (const c of impl.namedChildren ?? []) { + if (c.type === 'qualified_name' || c.type === 'name') { + edges.push({ fromName: className, toName: c.text, kind: 'implements' }); + } + } + } + } + } + for (const child of node.children ?? []) visit(child); + } + + visit(rootNode); + return edges; + }, + + extractImports(rootNode: TSNode): ExtractedImport[] { + const imports: ExtractedImport[] = []; + + function visit(node: TSNode): void { + if (node.type === 'namespace_use_declaration') { + for (const clause of node.namedChildren ?? []) { + if (clause.type === 'namespace_use_clause') { + const name = clause.namedChildren?.[0]; + if (name) imports.push({ specifier: name.text }); + } + } + } + for (const child of node.children ?? []) visit(child); + } + + visit(rootNode); + return imports; + }, +}; + +let _registered = false; + +export function registerPhp(): void { + if (_registered) return; + _registered = true; + registerLanguage('php', 'tree-sitter-php.wasm', phpMapper); +} diff --git a/src/lib/parsers/languages/python.ts b/src/lib/parsers/languages/python.ts new file mode 100644 index 0000000..95a7a1c --- /dev/null +++ b/src/lib/parsers/languages/python.ts @@ -0,0 +1,193 @@ +import type { ExtractedSymbol, ExtractedEdge, ExtractedImport, LanguageMapper } from './types'; +import { registerLanguage } from './registry'; +import { + type TSNode, + truncate, + startLine, + endLine, + sliceBeforeBody, + buildBody, + getPrecedingDoc, +} from './helpers'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function getDoc(node: TSNode): string { + return getPrecedingDoc(node, ['comment'], '#'); +} + +/** Extract Python docstring from the first statement in a body block. */ +function getDocstring(body: TSNode): string { + if (!body) return ''; + const first = body.namedChildren?.[0]; + if (first?.type === 'expression_statement') { + const str = first.namedChildren?.[0]; + if (str?.type === 'string') return str.text.trim(); + } + return ''; +} + +function buildSig(node: TSNode): string { + const body = node.childForFieldName('body'); + if (!body) return truncate(node.text ?? ''); + const header = sliceBeforeBody(node, body); + return truncate(header ?? (node.text ?? '').split('\n')[0]); +} + +// --------------------------------------------------------------------------- +// Symbol extraction +// --------------------------------------------------------------------------- + +function extractFunctionDef(node: TSNode, doc: string): ExtractedSymbol { + const name = node.childForFieldName('name')?.text ?? ''; + const body = node.childForFieldName('body'); + const docComment = doc || getDocstring(body); + return { + name, + kind: 'function', + signature: buildSig(node), + docComment, + body: buildBody(node, docComment), + startLine: startLine(node), + endLine: endLine(node), + isExported: true, + }; +} + +function extractClassMethods(classBody: TSNode): ExtractedSymbol[] { + const children: ExtractedSymbol[] = []; + if (!classBody) return children; + for (const stmt of classBody.namedChildren ?? []) { + let defNode = stmt; + if (stmt.type === 'decorated_definition') { + defNode = stmt.childForFieldName('definition') ?? stmt; + } + if (defNode.type === 'function_definition') { + const name = defNode.childForFieldName('name')?.text ?? ''; + if (!name) continue; + const body = defNode.childForFieldName('body'); + const docComment = getDocstring(body); + children.push({ + name, + kind: name === '__init__' ? 'constructor' : 'method', + signature: buildSig(defNode), + docComment, + body: buildBody(defNode, docComment), + startLine: startLine(defNode), + endLine: endLine(defNode), + isExported: false, + }); + } + } + return children; +} + +function extractClassDef(node: TSNode, doc: string): ExtractedSymbol { + const name = node.childForFieldName('name')?.text ?? ''; + const body = node.childForFieldName('body'); + const docComment = doc || getDocstring(body); + const children = extractClassMethods(body); + return { + name, + kind: 'class', + signature: buildSig(node), + docComment, + body: buildBody(node, docComment), + startLine: startLine(node), + endLine: endLine(node), + isExported: true, + children: children.length > 0 ? children : undefined, + }; +} + +function processTopLevel(node: TSNode): ExtractedSymbol[] { + switch (node.type) { + case 'function_definition': + return [extractFunctionDef(node, getDoc(node))]; + case 'class_definition': + return [extractClassDef(node, getDoc(node))]; + case 'decorated_definition': { + const inner = node.childForFieldName('definition'); + if (!inner) return []; + const doc = getDoc(node); + if (inner.type === 'function_definition') return [extractFunctionDef(inner, doc)]; + if (inner.type === 'class_definition') return [extractClassDef(inner, doc)]; + return []; + } + default: + return []; + } +} + +// --------------------------------------------------------------------------- +// Main mapper +// --------------------------------------------------------------------------- + +const pythonMapper: LanguageMapper = { + extractSymbols(rootNode: TSNode): ExtractedSymbol[] { + const symbols: ExtractedSymbol[] = []; + for (const child of rootNode.children ?? []) { + symbols.push(...processTopLevel(child)); + } + return symbols; + }, + + extractEdges(rootNode: TSNode): ExtractedEdge[] { + const edges: ExtractedEdge[] = []; + + function visit(node: TSNode): void { + if (node.type === 'class_definition') { + const className = node.childForFieldName('name')?.text; + if (className) { + const superclasses = node.childForFieldName('superclasses'); + if (superclasses) { + for (const arg of superclasses.namedChildren ?? []) { + const baseName = arg.type === 'identifier' ? arg.text + : arg.type === 'attribute' ? arg.childForFieldName('attribute')?.text ?? arg.text + : null; + if (baseName && baseName !== 'object') { + edges.push({ fromName: className, toName: baseName, kind: 'extends' }); + } + } + } + } + } + for (const child of node.children ?? []) visit(child); + } + + visit(rootNode); + return edges; + }, + + extractImports(rootNode: TSNode): ExtractedImport[] { + const imports: ExtractedImport[] = []; + for (const child of rootNode.children ?? []) { + if (child.type === 'import_statement') { + for (const n of child.namedChildren ?? []) { + const text = n.type === 'dotted_name' ? n.text + : n.type === 'aliased_import' ? n.childForFieldName('name')?.text + : null; + if (text) imports.push({ specifier: text }); + } + } else if (child.type === 'import_from_statement') { + const mod = child.childForFieldName('module_name')?.text; + if (mod) imports.push({ specifier: mod }); + } + } + return imports; + }, +}; + +// --------------------------------------------------------------------------- +// Registration +// --------------------------------------------------------------------------- + +let _registered = false; + +export function registerPython(): void { + if (_registered) return; + _registered = true; + registerLanguage('python', 'tree-sitter-python.wasm', pythonMapper); +} diff --git a/src/lib/parsers/languages/regex-mapper.ts b/src/lib/parsers/languages/regex-mapper.ts new file mode 100644 index 0000000..61f5ed5 --- /dev/null +++ b/src/lib/parsers/languages/regex-mapper.ts @@ -0,0 +1,168 @@ +/** + * Regex-based language parsing — fallback when no tree-sitter grammar is + * available. Operates directly on source text using line-anchored patterns + * to extract function/class/etc. definitions. Less accurate than tree-sitter, + * but works for any text-based language without external grammar dependencies. + */ +import type { CodeNodeKind } from '@/graphs/code-types'; +import type { + ExtractedSymbol, + ExtractedEdge, + ExtractedImport, + RegexLanguageMapper, +} from './types'; + +const SIGNATURE_MAX_LEN = 200; + +/** A regex pattern that recognizes a category of symbol. */ +export interface RegexSymbolPattern { + /** Symbol kind to assign when this pattern matches. */ + kind: CodeNodeKind; + /** Regex with a named group `name`. The mapper auto-applies the `gm` flags. */ + pattern: RegExp; +} + +/** A regex pattern for import/include statements. */ +export interface RegexImportPattern { + /** Regex with a named group `specifier`. The mapper auto-applies the `gm` flags. */ + pattern: RegExp; +} + +export interface RegexMapperOptions { + symbols: RegexSymbolPattern[]; + imports?: RegexImportPattern[]; + /** + * Pattern matching a single doc-comment line (e.g. /^\s*#/ for shell-style, + * /^\s*\/\// for C-style). When set, contiguous comment lines preceding a + * symbol are attached as its docComment. + */ + docCommentLine?: RegExp; +} + +function truncate(text: string, maxLen = SIGNATURE_MAX_LEN): string { + const collapsed = text.replace(/\s+/g, ' ').trim(); + return collapsed.length > maxLen ? collapsed.slice(0, maxLen) + '…' : collapsed; +} + +/** Ensure a regex has each of the required flags. */ +function withFlags(re: RegExp, required: string): RegExp { + let flags = re.flags; + for (const f of required) if (!flags.includes(f)) flags += f; + return new RegExp(re.source, flags); +} + +/** Convert a 0-based byte offset into a 1-based line number. */ +function offsetToLine(source: string, offset: number): number { + let line = 1; + const limit = Math.min(offset, source.length); + for (let i = 0; i < limit; i++) { + if (source.charCodeAt(i) === 10 /* \n */) line++; + } + return line; +} + +/** Walk backward from `beforeLineIdx-1`, collecting contiguous comment lines. */ +function collectDocComment(lines: string[], beforeLineIdx: number, pattern: RegExp): string { + const collected: string[] = []; + for (let i = beforeLineIdx - 1; i >= 0; i--) { + const line = lines[i]; + if (line === undefined) break; + if (pattern.test(line)) { + collected.unshift(line.trim()); + } else { + break; + } + } + return collected.join('\n'); +} + +/** Drop duplicates with the same name+startLine (multiple patterns can match the same span). */ +function dedupeSymbols(symbols: ExtractedSymbol[]): ExtractedSymbol[] { + const seen = new Set(); + const out: ExtractedSymbol[] = []; + for (const s of symbols) { + const key = `${s.name}:${s.startLine}`; + if (seen.has(key)) continue; + seen.add(key); + out.push(s); + } + return out; +} + +/** Build a RegexLanguageMapper from a set of patterns. */ +export function createRegexMapper(opts: RegexMapperOptions): RegexLanguageMapper { + const symbolPatterns = opts.symbols.map(p => ({ + ...p, + pattern: withFlags(p.pattern, 'gm'), + })); + const importPatterns = (opts.imports ?? []).map(p => ({ + ...p, + pattern: withFlags(p.pattern, 'gm'), + })); + const docCommentLine = opts.docCommentLine; + + return { + extractSymbols(source: string): ExtractedSymbol[] { + const symbols: ExtractedSymbol[] = []; + const lines = source.split(/\r?\n/); + + for (const { kind, pattern } of symbolPatterns) { + pattern.lastIndex = 0; + let m: RegExpExecArray | null; + while ((m = pattern.exec(source)) !== null) { + if (m[0].length === 0) { + pattern.lastIndex++; + continue; + } + const name = m.groups?.name; + if (!name) continue; + const startLine = offsetToLine(source, m.index); + const endLine = startLine + m[0].split(/\r?\n/).length - 1; + const signature = truncate(lines[startLine - 1] ?? m[0]); + const docComment = docCommentLine + ? collectDocComment(lines, startLine - 1, docCommentLine) + : ''; + symbols.push({ + name, + kind, + signature, + docComment, + body: m[0], + startLine, + endLine, + // Regex parsing has no scope info — assume top-level definitions + // are the public API. + isExported: true, + }); + } + } + + symbols.sort((a, b) => a.startLine - b.startLine || a.name.localeCompare(b.name)); + return dedupeSymbols(symbols); + }, + + extractEdges(_source: string): ExtractedEdge[] { + // Inheritance edges (extends/implements) are not extracted via regex. + // Recovering them robustly across syntaxes (`extends Foo`, `: public Foo`, + // `<: Foo`, `impl Foo for Bar`, …) is too noisy without an AST. + return []; + }, + + extractImports(source: string): ExtractedImport[] { + const out: ExtractedImport[] = []; + for (const { pattern } of importPatterns) { + pattern.lastIndex = 0; + let m: RegExpExecArray | null; + while ((m = pattern.exec(source)) !== null) { + if (m[0].length === 0) { + pattern.lastIndex++; + continue; + } + const specifier = m.groups?.specifier; + if (specifier) out.push({ specifier }); + } + } + return out; + }, + }; +} diff --git a/src/lib/parsers/languages/regex-patterns.ts b/src/lib/parsers/languages/regex-patterns.ts new file mode 100644 index 0000000..652d892 --- /dev/null +++ b/src/lib/parsers/languages/regex-patterns.ts @@ -0,0 +1,286 @@ +/** + * Built-in regex fallback patterns for languages without tree-sitter WASM grammars. + * Languages with WASM grammars (Python, Go, Rust, Java, PHP, Ruby, C#, C++, Bash, GDScript) + * are handled by dedicated tree-sitter mappers instead. + */ +import { createRegexMapper } from './regex-mapper'; +import { registerRegexLanguage } from './registry'; +import type { ExtractedSymbol, ExtractedImport, RegexLanguageMapper } from './types'; + +const SLASH_LINE = /^\s*\/\//; +const HASH_LINE = /^\s*#/; +const DASH_LINE = /^\s*--/; + +let _registered = false; + +/** Register the built-in regex fallback mappers. Idempotent. */ +export function registerRegexLanguages(): void { + if (_registered) return; + _registered = true; + + // ---- Kotlin ---- + registerRegexLanguage('kotlin', createRegexMapper({ + docCommentLine: SLASH_LINE, + symbols: [ + { kind: 'function', pattern: /^[ \t]*(?:public\s+|private\s+|internal\s+|protected\s+|inline\s+|suspend\s+|override\s+|open\s+|operator\s+|infix\s+)*fun\s+(?:<[^>]+>\s+)?(?:[\w.]+\.)?(?[A-Za-z_]\w*)/m }, + { kind: 'class', pattern: /^[ \t]*(?:public\s+|private\s+|internal\s+|protected\s+|abstract\s+|open\s+|sealed\s+|data\s+|inner\s+|annotation\s+)*class\s+(?[A-Za-z_]\w*)/m }, + { kind: 'interface', pattern: /^[ \t]*(?:public\s+|private\s+|internal\s+|protected\s+)*interface\s+(?[A-Za-z_]\w*)/m }, + { kind: 'class', pattern: /^[ \t]*(?:public\s+|private\s+|internal\s+)*object\s+(?[A-Za-z_]\w*)/m }, + ], + imports: [ + { pattern: /^\s*import\s+(?[\w.]+)/m }, + ], + })); + + // ---- Swift ---- + registerRegexLanguage('swift', createRegexMapper({ + docCommentLine: SLASH_LINE, + symbols: [ + { kind: 'function', pattern: /^[ \t]*(?:public\s+|private\s+|internal\s+|fileprivate\s+|open\s+|static\s+|final\s+|override\s+|mutating\s+)*func\s+(?[A-Za-z_]\w*)/m }, + { kind: 'class', pattern: /^[ \t]*(?:public\s+|private\s+|internal\s+|open\s+|final\s+)*class\s+(?[A-Za-z_]\w*)/m }, + { kind: 'class', pattern: /^[ \t]*(?:public\s+|private\s+|internal\s+|open\s+)*struct\s+(?[A-Za-z_]\w*)/m }, + { kind: 'interface', pattern: /^[ \t]*(?:public\s+|private\s+|internal\s+|open\s+)*protocol\s+(?[A-Za-z_]\w*)/m }, + { kind: 'enum', pattern: /^[ \t]*(?:public\s+|private\s+|internal\s+|open\s+)*enum\s+(?[A-Za-z_]\w*)/m }, + ], + imports: [ + { pattern: /^\s*import\s+(?[\w.]+)/m }, + ], + })); + + // ---- Lua ---- + registerRegexLanguage('lua', createRegexMapper({ + docCommentLine: DASH_LINE, + symbols: [ + { kind: 'function', pattern: /^[ \t]*(?:local\s+)?function\s+(?:[\w.:]+[.:])?(?[A-Za-z_]\w*)/m }, + { kind: 'function', pattern: /^[ \t]*(?:local\s+)?(?[A-Za-z_]\w*)\s*=\s*function/m }, + ], + imports: [ + { pattern: /\brequire\s*\(?\s*['"](?[^'"]+)['"]/m }, + ], + })); + + // ---- GLSL / shader ---- + const glslMapper = createRegexMapper({ + docCommentLine: SLASH_LINE, + symbols: [ + { kind: 'function', pattern: /^[A-Za-z_][\w]*\s+(?[A-Za-z_]\w*)\s*\([^)]*\)\s*\{/m }, + { kind: 'variable', pattern: /^\s*uniform\s+[\w\s]+?(?[A-Za-z_]\w*)\s*[;=]/m }, + { kind: 'type', pattern: /^[ \t]*struct\s+(?[A-Za-z_]\w*)\s*\{/m }, + ], + imports: [ + { pattern: /^\s*#\s*include\s*[<"](?[^>"]+)[>"]/m }, + ], + }); + registerRegexLanguage('glsl', glslMapper); + + // ---- Dart ---- + registerRegexLanguage('dart', createRegexMapper({ + docCommentLine: SLASH_LINE, + symbols: [ + { kind: 'class', pattern: /^[ \t]*(?:abstract\s+)?class\s+(?[A-Za-z_]\w*)/m }, + { kind: 'interface', pattern: /^[ \t]*mixin\s+(?[A-Za-z_]\w*)/m }, + ], + imports: [ + { pattern: /^\s*import\s+['"](?[^'"]+)['"]/m }, + ], + })); + + // ---- SQL ---- + registerRegexLanguage('sql', createRegexMapper({ + docCommentLine: DASH_LINE, + symbols: [ + { kind: 'function', pattern: /^\s*CREATE\s+(?:OR\s+REPLACE\s+)?(?:FUNCTION|PROCEDURE)\s+(?[A-Za-z_][\w.]*)/im }, + { kind: 'type', pattern: /^\s*CREATE\s+(?:OR\s+REPLACE\s+)?(?:TABLE|VIEW|TYPE)\s+(?:IF\s+NOT\s+EXISTS\s+)?(?[A-Za-z_][\w.]*)/im }, + ], + imports: [], + })); + + // ---- Scala ---- + registerRegexLanguage('scala', createRegexMapper({ + docCommentLine: SLASH_LINE, + symbols: [ + { kind: 'function', pattern: /^[ \t]*(?:override\s+|private\s+|protected\s+|public\s+|implicit\s+)*def\s+(?[A-Za-z_]\w*)/m }, + { kind: 'class', pattern: /^[ \t]*(?:abstract\s+|sealed\s+|final\s+|case\s+)*class\s+(?[A-Za-z_]\w*)/m }, + { kind: 'interface', pattern: /^[ \t]*(?:sealed\s+)?trait\s+(?[A-Za-z_]\w*)/m }, + { kind: 'class', pattern: /^[ \t]*(?:case\s+)?object\s+(?[A-Za-z_]\w*)/m }, + ], + imports: [ + { pattern: /^\s*import\s+(?[\w.]+)/m }, + ], + })); + + // ---- Elixir ---- + registerRegexLanguage('elixir', createRegexMapper({ + docCommentLine: HASH_LINE, + symbols: [ + { kind: 'function', pattern: /^[ \t]*def(?:p)?\s+(?[A-Za-z_]\w*)/m }, + { kind: 'class', pattern: /^[ \t]*defmodule\s+(?[A-Z][\w.]*)/m }, + ], + imports: [ + { pattern: /^\s*(?:import|alias|use|require)\s+(?[A-Z][\w.]*)/m }, + ], + })); + + // ---- Haskell ---- + registerRegexLanguage('haskell', createRegexMapper({ + docCommentLine: DASH_LINE, + symbols: [ + { kind: 'function', pattern: /^(?[a-z]\w*)\s*::/m }, + { kind: 'class', pattern: /^data\s+(?[A-Z]\w*)/m }, + { kind: 'type', pattern: /^type\s+(?[A-Z]\w*)/m }, + { kind: 'interface', pattern: /^class\s+(?:\([^)]+\)\s+=>\s+)?(?[A-Z]\w*)/m }, + ], + imports: [ + { pattern: /^\s*import\s+(?:qualified\s+)?(?[\w.]+)/m }, + ], + })); + + // ---- Godot Scene (.tscn / .escn) ---- + // Custom mapper: nodes can share names under different parents, so we use + // the full node path (parent/name) as the unique symbol identifier. + registerRegexLanguage('godot-scene', ((): RegexLanguageMapper => { + const NODE_RE = /^\[node name="([^"]+)"(?:[^\]]*? type="([^"]*)")?(?:[^\]]*? parent="([^"]*)")?\]/gm; + const SUB_RE = /^\[sub_resource type="([^"]*)" id="([^"]*)"/gm; + const CONN_RE = /^\[connection signal="([^"]+)" from="([^"]+)" to="([^"]+)" method="([^"]+)"/gm; + const EXT_RE = /^\[ext_resource [^\]]*path="(res:\/\/[^"]+)"/gm; + + function offsetToLine(src: string, idx: number): number { + let n = 1; + for (let i = 0; i < idx && i < src.length; i++) if (src.charCodeAt(i) === 10) n++; + return n; + } + + return { + extractSymbols(source: string) { + const symbols: ExtractedSymbol[] = []; + const seen = new Set(); + + // Nodes — build full path to avoid duplicate names + NODE_RE.lastIndex = 0; + let m: RegExpExecArray | null; + while ((m = NODE_RE.exec(source)) !== null) { + const nodeName = m[1]; + const parent = m[3]; // undefined = root, "." = direct child of root + + let fullPath: string; + if (parent === undefined) { + fullPath = nodeName; // root node + } else if (parent === '.') { + fullPath = nodeName; + } else { + fullPath = `${parent}/${nodeName}`; + } + + // Disambiguate truly duplicate paths (shouldn't happen in valid tscn) + let key = fullPath; + let n = 1; + while (seen.has(key)) key = `${fullPath}#${++n}`; + seen.add(key); + + const line = offsetToLine(source, m.index); + symbols.push({ + name: key, + kind: parent === undefined ? 'class' : 'variable', + signature: m[0], + docComment: '', + body: m[0], + startLine: line, + endLine: line, + isExported: true, + }); + } + + // Sub-resources + SUB_RE.lastIndex = 0; + while ((m = SUB_RE.exec(source)) !== null) { + const id = m[2]; + const type = m[1]; + const key = `${type}::${id}`; + const line = offsetToLine(source, m.index); + symbols.push({ + name: key, + kind: 'variable', + signature: m[0], + docComment: '', + body: m[0], + startLine: line, + endLine: line, + isExported: false, + }); + } + + // Connections + CONN_RE.lastIndex = 0; + while ((m = CONN_RE.exec(source)) !== null) { + const signal = m[1]; + const from = m[2]; + const method = m[4]; + const key = `${from}.${signal}→${method}`; + const line = offsetToLine(source, m.index); + symbols.push({ + name: key, + kind: 'variable', + signature: m[0], + docComment: '', + body: m[0], + startLine: line, + endLine: line, + isExported: false, + }); + } + + symbols.sort((a, b) => a.startLine - b.startLine); + return symbols; + }, + + extractEdges(_source: string) { return []; }, + + extractImports(source: string) { + const out: ExtractedImport[] = []; + EXT_RE.lastIndex = 0; + let m: RegExpExecArray | null; + while ((m = EXT_RE.exec(source)) !== null) out.push({ specifier: m[1] }); + return out; + }, + }; + })()); + + // ---- Godot Resource (.tres) ---- + registerRegexLanguage('godot-resource', createRegexMapper({ + docCommentLine: /^\s*;/, + symbols: [ + // Resource type as the main "class" + { kind: 'class', pattern: /^\[gd_resource type="(?[^"]+)"/m }, + // Embedded sub-resources + { kind: 'variable', pattern: /^\[sub_resource type="[^"]*" id="(?[^"]+)"/m }, + ], + imports: [ + { pattern: /^\[ext_resource [^\]]*path="(?res:\/\/[^"]+)"/m }, + ], + })); + + // ---- Godot Project (project.godot) ---- + registerRegexLanguage('godot-project', createRegexMapper({ + docCommentLine: /^\s*;/, + symbols: [ + // Config sections like [application], [rendering/environment/defaults] + { kind: 'variable', pattern: /^\[(?[a-z][a-z0-9_/]*)\]/m }, + ], + imports: [ + // Main scene and other res:// references + { pattern: /=\s*"(?res:\/\/[^"]+)"/m }, + ], + })); + + // ---- Godot Extension (.gdextension) ---- + registerRegexLanguage('godot-extension', createRegexMapper({ + docCommentLine: /^\s*;/, + symbols: [ + // INI sections + { kind: 'variable', pattern: /^\[(?[a-z][a-z0-9_.]*)\]/m }, + ], + imports: [ + { pattern: /=\s*"(?res:\/\/[^"]+)"/m }, + ], + })); +} diff --git a/src/lib/parsers/languages/registry.ts b/src/lib/parsers/languages/registry.ts index 1c0b430..0ec7c0c 100644 --- a/src/lib/parsers/languages/registry.ts +++ b/src/lib/parsers/languages/registry.ts @@ -1,7 +1,13 @@ import path from 'path'; -import type { LanguageMapper } from './types'; +import type { LanguageMapper, RegexLanguageMapper } from './types'; -export { type LanguageMapper, type ExtractedSymbol, type ExtractedEdge, type ExtractedImport } from './types'; +export { + type LanguageMapper, + type RegexLanguageMapper, + type ExtractedSymbol, + type ExtractedEdge, + type ExtractedImport, +} from './types'; // web-tree-sitter types (loaded lazily) type WTSLanguage = any; @@ -18,6 +24,9 @@ interface LanguageEntry { /** Map from language name (matching file-lang.ts names) to entry. */ const languages = new Map(); +/** Map from language name to a regex-based fallback mapper. */ +const regexLanguages = new Map(); + /** WASM directory containing grammar .wasm files */ const WASM_DIR = path.join( path.dirname(require.resolve('@vscode/tree-sitter-wasm/package.json')), @@ -42,11 +51,16 @@ export async function initParser(): Promise { return _initPromise; } -/** Register a language (sync — only stores metadata). */ +/** Register a tree-sitter language (sync — only stores metadata). */ export function registerLanguage(name: string, wasmFile: string, mapper: LanguageMapper): void { languages.set(name, { wasmFile, language: null, mapper }); } +/** Register a regex-based fallback mapper for a language without tree-sitter support. */ +export function registerRegexLanguage(name: string, mapper: RegexLanguageMapper): void { + regexLanguages.set(name, mapper); +} + /** Load a language WASM if not already loaded. */ async function loadLanguage(entry: LanguageEntry): Promise { if (entry.language) return entry.language; @@ -56,11 +70,16 @@ async function loadLanguage(entry: LanguageEntry): Promise { return entry.language; } -/** Check if a language is registered. */ +/** Check if a tree-sitter language is registered. */ export function isLanguageSupported(languageName: string): boolean { return languages.has(languageName); } +/** Check if a regex-fallback mapper is registered for a language. */ +export function isRegexLanguageSupported(languageName: string): boolean { + return regexLanguages.has(languageName); +} + /** Reusable parser per language (avoids WASM memory leak from creating Parser on every call). */ const parsers = new Map(); @@ -81,12 +100,22 @@ export async function parseSource(code: string, languageName: string): Promise 0 ? children : undefined, + }]; + } + case 'module': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + return [{ + name, + kind: 'interface', + signature: buildSig(node), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: true, + }]; + } + default: + return []; + } +} + +const rubyMapper: LanguageMapper = { + extractSymbols(rootNode: TSNode): ExtractedSymbol[] { + const symbols: ExtractedSymbol[] = []; + + function visit(node: TSNode): void { + const results = processTopLevel(node); + if (results.length > 0) { + symbols.push(...results); + return; + } + for (const child of node.children ?? []) visit(child); + } + + visit(rootNode); + return symbols; + }, + + extractEdges(rootNode: TSNode): ExtractedEdge[] { + const edges: ExtractedEdge[] = []; + + function visit(node: TSNode): void { + if (node.type === 'class') { + const className = node.childForFieldName('name')?.text; + const superclass = node.childForFieldName('superclass'); + if (className && superclass) { + // superclass node: < constant + for (const c of superclass.namedChildren ?? []) { + if (c.type === 'constant' || c.type === 'scope_resolution') { + edges.push({ fromName: className, toName: c.text, kind: 'extends' }); + break; + } + } + } + } + for (const child of node.children ?? []) visit(child); + } + + visit(rootNode); + return edges; + }, + + extractImports(rootNode: TSNode): ExtractedImport[] { + const imports: ExtractedImport[] = []; + + function visit(node: TSNode): void { + // require 'foo' or require_relative 'foo' → call nodes + if (node.type === 'call') { + const method = node.childForFieldName('method'); + if (method?.text === 'require' || method?.text === 'require_relative') { + const args = node.childForFieldName('arguments'); + if (args) { + for (const arg of args.namedChildren ?? []) { + if (arg.type === 'string') { + const specifier = arg.text.replace(/^['"]|['"]$/g, ''); + imports.push({ specifier }); + } + } + } + } + } + for (const child of node.children ?? []) visit(child); + } + + visit(rootNode); + return imports; + }, +}; + +let _registered = false; + +export function registerRuby(): void { + if (_registered) return; + _registered = true; + registerLanguage('ruby', 'tree-sitter-ruby.wasm', rubyMapper); +} diff --git a/src/lib/parsers/languages/rust.ts b/src/lib/parsers/languages/rust.ts new file mode 100644 index 0000000..5cce9c9 --- /dev/null +++ b/src/lib/parsers/languages/rust.ts @@ -0,0 +1,218 @@ +import type { ExtractedSymbol, ExtractedEdge, ExtractedImport, LanguageMapper } from './types'; +import { registerLanguage } from './registry'; +import { + type TSNode, + truncate, + startLine, + endLine, + sliceBeforeBody, + buildBody, + getPrecedingDoc, +} from './helpers'; + +function getDoc(node: TSNode): string { + // Rust uses `///` doc comments (line_comment) or `/** */` (block_comment) + return getPrecedingDoc(node, ['line_comment', 'block_comment'], '///') || + getPrecedingDoc(node, ['block_comment'], '/**'); +} + +function buildSig(node: TSNode): string { + const body = node.childForFieldName('body'); + if (!body) return truncate(node.text ?? ''); + const header = sliceBeforeBody(node, body); + return truncate(header ?? (node.text ?? '').split('\n')[0]); +} + +function isPublic(node: TSNode): boolean { + for (const child of node.children ?? []) { + if (child.type === 'visibility_modifier') return true; + } + return false; +} + +function extractImplMethods(body: TSNode): ExtractedSymbol[] { + const children: ExtractedSymbol[] = []; + if (!body) return children; + for (const item of body.namedChildren ?? []) { + if (item.type === 'function_item') { + const name = item.childForFieldName('name')?.text ?? ''; + if (!name) continue; + const doc = getDoc(item); + children.push({ + name, + kind: 'method', + signature: buildSig(item), + docComment: doc, + body: buildBody(item, doc), + startLine: startLine(item), + endLine: endLine(item), + isExported: isPublic(item), + }); + } + } + return children; +} + +function processTopLevel(node: TSNode): ExtractedSymbol[] { + switch (node.type) { + case 'function_item': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + return [{ + name, + kind: 'function', + signature: buildSig(node), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: isPublic(node), + }]; + } + case 'struct_item': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + return [{ + name, + kind: 'class', + signature: buildSig(node), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: isPublic(node), + }]; + } + case 'enum_item': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + return [{ + name, + kind: 'enum', + signature: buildSig(node), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: isPublic(node), + }]; + } + case 'trait_item': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + return [{ + name, + kind: 'interface', + signature: buildSig(node), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: isPublic(node), + }]; + } + case 'impl_item': { + // impl Trait for Type or impl Type — extract methods as children of the type + const typeNode = node.childForFieldName('type'); + const traitNode = node.childForFieldName('trait'); + const typeName = typeNode?.text ?? ''; + if (!typeName) return []; + const body = node.childForFieldName('body'); + const children = extractImplMethods(body); + const doc = getDoc(node); + const implName = traitNode ? `${typeName}::${traitNode.text}` : typeName; + return [{ + name: implName, + kind: 'class', + signature: truncate(node.text?.split('\n')[0] ?? ''), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: false, + children: children.length > 0 ? children : undefined, + }]; + } + case 'mod_item': { + const name = node.childForFieldName('name')?.text ?? ''; + if (!name) return []; + const doc = getDoc(node); + return [{ + name, + kind: 'interface', + signature: truncate(node.text?.split('\n')[0] ?? ''), + docComment: doc, + body: buildBody(node, doc), + startLine: startLine(node), + endLine: endLine(node), + isExported: isPublic(node), + }]; + } + default: + return []; + } +} + +const rustMapper: LanguageMapper = { + extractSymbols(rootNode: TSNode): ExtractedSymbol[] { + const symbols: ExtractedSymbol[] = []; + for (const child of rootNode.children ?? []) { + symbols.push(...processTopLevel(child)); + } + return symbols; + }, + + extractEdges(rootNode: TSNode): ExtractedEdge[] { + const edges: ExtractedEdge[] = []; + + function visit(node: TSNode): void { + if (node.type === 'impl_item') { + const typeNode = node.childForFieldName('type'); + const traitNode = node.childForFieldName('trait'); + if (typeNode && traitNode) { + edges.push({ + fromName: typeNode.text ?? '', + toName: traitNode.text ?? '', + kind: 'implements', + }); + } + } + for (const child of node.children ?? []) visit(child); + } + + visit(rootNode); + return edges; + }, + + extractImports(rootNode: TSNode): ExtractedImport[] { + const imports: ExtractedImport[] = []; + + function collectUse(node: TSNode): void { + if (node.type === 'use_declaration') { + const arg = node.childForFieldName('argument'); + if (arg) { + // Get the root crate/path from use tree + const text = arg.text ?? ''; + const top = text.split('::')[0].replace(/[^A-Za-z0-9_]/g, ''); + if (top) imports.push({ specifier: top }); + } + } + for (const child of node.children ?? []) collectUse(child); + } + + collectUse(rootNode); + return imports; + }, +}; + +let _registered = false; + +export function registerRust(): void { + if (_registered) return; + _registered = true; + registerLanguage('rust', 'tree-sitter-rust.wasm', rustMapper); +} diff --git a/src/lib/parsers/languages/types.ts b/src/lib/parsers/languages/types.ts index 6863065..c1660ac 100644 --- a/src/lib/parsers/languages/types.ts +++ b/src/lib/parsers/languages/types.ts @@ -27,7 +27,7 @@ export interface ExtractedImport { specifier: string; } -/** Language mapper interface — one per language. */ +/** Tree-sitter-based language mapper — operates on a tree-sitter AST root node. */ export interface LanguageMapper { /** Extract top-level symbols (with nested children) from a tree-sitter root node. */ extractSymbols(rootNode: any): ExtractedSymbol[]; @@ -36,3 +36,15 @@ export interface LanguageMapper { /** Extract relative import specifiers from a tree-sitter root node. */ extractImports(rootNode: any): ExtractedImport[]; } + +/** + * Regex-based language mapper. Operates directly on raw source text — used as + * a fallback for languages without a tree-sitter grammar. Less accurate than + * `LanguageMapper`, but works for any text-based language without bundling + * additional WASM grammars. + */ +export interface RegexLanguageMapper { + extractSymbols(source: string): ExtractedSymbol[]; + extractEdges(source: string): ExtractedEdge[]; + extractImports(source: string): ExtractedImport[]; +} diff --git a/src/tests/code-parser-advanced.test.ts b/src/tests/code-parser-advanced.test.ts index e387878..156964e 100644 --- a/src/tests/code-parser-advanced.test.ts +++ b/src/tests/code-parser-advanced.test.ts @@ -243,7 +243,7 @@ describe('unsupported language file', () => { }); it('returns file-only node for unknown extension', async () => { - const tmpFile = path.join(FIXTURES, '_test.py'); + const tmpFile = path.join(FIXTURES, '_test.xyz'); fs.writeFileSync(tmpFile, 'def foo(): pass\n'); try { const pf = await parseCodeFile(tmpFile, FIXTURES, 1000); diff --git a/src/tests/multi-config.test.ts b/src/tests/multi-config.test.ts index 7709894..29f9028 100644 --- a/src/tests/multi-config.test.ts +++ b/src/tests/multi-config.test.ts @@ -23,7 +23,7 @@ projects: expect(p.projectDir).toBe('/tmp/my-app'); expect(p.graphMemory).toBe('/tmp/my-app/.graph-memory'); expect(p.graphConfigs.docs.include).toBe('**/*.md'); - expect(p.graphConfigs.code.include).toBe('**/*.{js,ts,jsx,tsx,mjs,mts,cjs,cts}'); + expect(p.graphConfigs.code.include).toBe('**/*.{js,ts,jsx,tsx,mjs,mts,cjs,cts,gd,gdshader,gdshaderinc,glsl,tscn,escn,tres,godot,gdextension}'); expect(p.exclude).toContain('**/node_modules/**'); expect(p.chunkDepth).toBe(4); expect(p.embedding.maxChars).toBe(24000); diff --git a/src/tests/regex-parser.test.ts b/src/tests/regex-parser.test.ts new file mode 100644 index 0000000..71f2f30 --- /dev/null +++ b/src/tests/regex-parser.test.ts @@ -0,0 +1,120 @@ +import { createRegexMapper } from '@/lib/parsers/languages/regex-mapper'; +import { + registerRegexLanguages, + getRegexMapper, + isRegexLanguageSupported, +} from '@/lib/parsers/languages'; + +beforeAll(() => { + registerRegexLanguages(); +}); + +describe('createRegexMapper', () => { + const mapper = createRegexMapper({ + docCommentLine: /^\s*#/, + symbols: [ + { kind: 'function', pattern: /^def\s+(?\w+)\s*\(/m }, + { kind: 'class', pattern: /^class\s+(?\w+)/m }, + ], + imports: [ + { pattern: /^import\s+(?\S+)/m }, + ], + }); + + it('extracts function name', () => { + const out = mapper.extractSymbols('def hello(x):\n pass\n'); + expect(out.map(s => s.name)).toEqual(['hello']); + expect(out[0].kind).toBe('function'); + expect(out[0].startLine).toBe(1); + }); + + it('extracts class name', () => { + const out = mapper.extractSymbols('class Foo:\n pass\n'); + expect(out.map(s => s.name)).toEqual(['Foo']); + expect(out[0].kind).toBe('class'); + }); + + it('extracts both functions and classes from same source', () => { + const src = 'class Foo:\n pass\ndef bar():\n pass\n'; + const out = mapper.extractSymbols(src); + expect(out).toHaveLength(2); + expect(out.map(s => s.name).sort()).toEqual(['Foo', 'bar']); + }); + + it('attaches preceding comment lines as docComment', () => { + const src = '# helper for things\n# does X then Y\ndef hello():\n pass\n'; + const out = mapper.extractSymbols(src); + expect(out[0].docComment).toContain('helper for things'); + expect(out[0].docComment).toContain('does X then Y'); + }); + + it('extracts imports', () => { + const out = mapper.extractImports('import os\nimport sys.path\n'); + expect(out.map(i => i.specifier)).toEqual(['os', 'sys.path']); + }); + + it('returns empty edges array (regex parsing has no AST)', () => { + expect(mapper.extractEdges('class Foo extends Bar {}')).toEqual([]); + }); + + it('reports correct line numbers for multi-line source', () => { + const src = '\n\n\ndef hello():\n pass\n'; + const out = mapper.extractSymbols(src); + expect(out[0].startLine).toBe(4); + }); + + it('handles empty source', () => { + expect(mapper.extractSymbols('')).toEqual([]); + expect(mapper.extractImports('')).toEqual([]); + }); + + it('skips matches without a "name" group', () => { + const noNameMapper = createRegexMapper({ + symbols: [{ kind: 'function', pattern: /^def\s+\w+/m }], + }); + expect(noNameMapper.extractSymbols('def foo():\n')).toEqual([]); + }); + + it('marks symbols as exported (regex has no scope info)', () => { + const out = mapper.extractSymbols('def hello():\n pass\n'); + expect(out[0].isExported).toBe(true); + }); +}); + +describe('built-in regex languages', () => { + it('glsl is registered', () => { + expect(isRegexLanguageSupported('glsl')).toBe(true); + }); + + it('python is NOT regex-registered (handled by tree-sitter)', () => { + expect(isRegexLanguageSupported('python')).toBe(false); + }); + + it('go is NOT regex-registered (handled by tree-sitter)', () => { + expect(isRegexLanguageSupported('go')).toBe(false); + }); + + it('rust is NOT regex-registered (handled by tree-sitter)', () => { + expect(isRegexLanguageSupported('rust')).toBe(false); + }); + + it('gdscript is NOT regex-registered (handled by tree-sitter)', () => { + expect(isRegexLanguageSupported('gdscript')).toBe(false); + }); + + it('typescript is NOT regex-registered (handled by tree-sitter)', () => { + expect(isRegexLanguageSupported('typescript')).toBe(false); + }); + + describe('glsl mapper', () => { + const glsl = getRegexMapper('glsl')!; + it('extracts uniform', () => { + const out = glsl.extractSymbols('uniform float bass_impact;\n'); + expect(out.map(s => s.name)).toContain('bass_impact'); + }); + it('extracts function', () => { + const out = glsl.extractSymbols('vec3 fade(vec3 c, float t) {\n return c * t;\n}\n'); + expect(out.map(s => s.name)).toContain('fade'); + }); + }); +}); diff --git a/src/tests/tree-sitter-languages.test.ts b/src/tests/tree-sitter-languages.test.ts new file mode 100644 index 0000000..21274ca --- /dev/null +++ b/src/tests/tree-sitter-languages.test.ts @@ -0,0 +1,564 @@ +import path from 'path'; +import fs from 'fs'; +import { parseCodeFile } from '@/lib/parsers/code'; +import type { ParsedFile } from '@/lib/parsers/code'; + +const FIXTURES = path.join(__dirname, 'fixtures', 'code'); +const MTIME = 1000; + +function names(pf: ParsedFile): string[] { + return pf.nodes.filter(n => n.attrs.kind !== 'file').map(n => n.attrs.name); +} + +function node(pf: ParsedFile, name: string) { + return pf.nodes.find(n => n.attrs.name === name); +} + +async function parse(ext: string, src: string): Promise { + const tmpFile = path.join(FIXTURES, `_lang_test${ext}`); + fs.writeFileSync(tmpFile, src); + try { + return await parseCodeFile(tmpFile, FIXTURES, MTIME); + } finally { + fs.unlinkSync(tmpFile); + } +} + +// --------------------------------------------------------------------------- +// Python +// --------------------------------------------------------------------------- + +describe('python tree-sitter mapper', () => { + const src = ` +def greet(name: str) -> str: + """Say hello.""" + return f"hello {name}" + +class Animal: + """Base animal.""" + def __init__(self, name: str): + self.name = name + + def speak(self) -> str: + return "" + +class Dog(Animal): + def speak(self) -> str: + return "woof" +`.trimStart(); + + let pf: ParsedFile; + beforeAll(async () => { pf = await parse('.py', src); }); + + it('extracts top-level function', () => { expect(names(pf)).toContain('greet'); }); + it('function kind=function', () => { expect(node(pf, 'greet')?.attrs.kind).toBe('function'); }); + it('function has docComment', () => { expect(node(pf, 'greet')?.attrs.docComment).toContain('Say hello'); }); + it('extracts class', () => { expect(names(pf)).toContain('Animal'); }); + it('class kind=class', () => { expect(node(pf, 'Animal')?.attrs.kind).toBe('class'); }); + it('extracts subclass', () => { expect(names(pf)).toContain('Dog'); }); + it('extracts __init__ as constructor', () => { expect(names(pf)).toContain('__init__'); }); + it('constructor kind=constructor', () => { expect(node(pf, '__init__')?.attrs.kind).toBe('constructor'); }); + it('extracts method', () => { expect(names(pf)).toContain('speak'); }); + it('extends edge Dog→Animal', () => { + expect(pf.edges.some(e => e.attrs.kind === 'extends' && e.from.includes('Dog') && e.to.includes('Animal'))).toBe(true); + }); +}); + +// --------------------------------------------------------------------------- +// Go +// --------------------------------------------------------------------------- + +describe('go tree-sitter mapper', () => { + const src = `package main + +type Server struct { +\tHost string +\tPort int +} + +type Runner interface { +\tRun() error +} + +func NewServer(host string) *Server { +\treturn &Server{Host: host} +} + +func (s *Server) Start() error { +\treturn nil +} +`; + + let pf: ParsedFile; + beforeAll(async () => { pf = await parse('.go', src); }); + + it('extracts function', () => { expect(names(pf)).toContain('NewServer'); }); + it('function kind=function', () => { expect(node(pf, 'NewServer')?.attrs.kind).toBe('function'); }); + it('exported func isExported=true', () => { expect(node(pf, 'NewServer')?.attrs.isExported).toBe(true); }); + it('extracts method with receiver', () => { expect(names(pf)).toContain('Start'); }); + it('method kind=method', () => { expect(node(pf, 'Start')?.attrs.kind).toBe('method'); }); + it('extracts struct type as class', () => { expect(node(pf, 'Server')?.attrs.kind).toBe('class'); }); + it('extracts interface type', () => { expect(node(pf, 'Runner')?.attrs.kind).toBe('interface'); }); + it('exported struct isExported=true', () => { expect(node(pf, 'Server')?.attrs.isExported).toBe(true); }); +}); + +// --------------------------------------------------------------------------- +// Rust +// --------------------------------------------------------------------------- + +describe('rust tree-sitter mapper', () => { + const src = `/// A network engine. +pub struct Engine { + pub host: String, +} + +pub trait Runnable { + fn run(&self) -> bool; +} + +pub fn launch(host: &str) -> Engine { + Engine { host: host.to_string() } +} + +impl Engine { + pub fn stop(&self) {} +} +`; + + let pf: ParsedFile; + beforeAll(async () => { pf = await parse('.rs', src); }); + + it('extracts struct', () => { expect(names(pf)).toContain('Engine'); }); + it('struct kind=class', () => { expect(node(pf, 'Engine')?.attrs.kind).toBe('class'); }); + it('doc comment on struct', () => { expect(node(pf, 'Engine')?.attrs.docComment).toContain('network engine'); }); + it('extracts trait', () => { expect(names(pf)).toContain('Runnable'); }); + it('trait kind=interface', () => { expect(node(pf, 'Runnable')?.attrs.kind).toBe('interface'); }); + it('extracts function', () => { expect(names(pf)).toContain('launch'); }); + it('function kind=function', () => { expect(node(pf, 'launch')?.attrs.kind).toBe('function'); }); + it('extracts impl method', () => { expect(names(pf)).toContain('stop'); }); + it('impl method kind=method', () => { expect(node(pf, 'stop')?.attrs.kind).toBe('method'); }); +}); + +// --------------------------------------------------------------------------- +// Java +// --------------------------------------------------------------------------- + +describe('java tree-sitter mapper', () => { + const src = `/** + * Base service. + */ +public abstract class BaseService { + protected String name; + + public BaseService(String name) { + this.name = name; + } + + public abstract void start(); +} + +public interface Lifecycle { + void start(); + void stop(); +} +`; + + let pf: ParsedFile; + beforeAll(async () => { pf = await parse('.java', src); }); + + it('extracts class', () => { expect(names(pf)).toContain('BaseService'); }); + it('class kind=class', () => { expect(node(pf, 'BaseService')?.attrs.kind).toBe('class'); }); + it('class docComment', () => { expect(node(pf, 'BaseService')?.attrs.docComment).toContain('Base service'); }); + it('extracts interface', () => { expect(names(pf)).toContain('Lifecycle'); }); + it('interface kind=interface', () => { expect(node(pf, 'Lifecycle')?.attrs.kind).toBe('interface'); }); + it('extracts constructor', () => { expect(names(pf)).toContain('BaseService'); }); + it('extracts method', () => { expect(names(pf)).toContain('start'); }); +}); + +// --------------------------------------------------------------------------- +// PHP +// --------------------------------------------------------------------------- + +describe('php tree-sitter mapper', () => { + const src = `name = $name; + } + + public function getName(): string { + return $this->name; + } +} + +interface Repository { + public function find(int $id): mixed; +} +`; + + let pf: ParsedFile; + beforeAll(async () => { pf = await parse('.php', src); }); + + it('extracts function', () => { expect(names(pf)).toContain('greet'); }); + it('function kind=function', () => { expect(node(pf, 'greet')?.attrs.kind).toBe('function'); }); + it('extracts class', () => { expect(names(pf)).toContain('User'); }); + it('class kind=class', () => { expect(node(pf, 'User')?.attrs.kind).toBe('class'); }); + it('extracts interface', () => { expect(names(pf)).toContain('Repository'); }); + it('interface kind=interface', () => { expect(node(pf, 'Repository')?.attrs.kind).toBe('interface'); }); + it('extracts method', () => { expect(names(pf)).toContain('getName'); }); + it('extracts constructor', () => { expect(names(pf)).toContain('__construct'); }); +}); + +// --------------------------------------------------------------------------- +// Ruby +// --------------------------------------------------------------------------- + +describe('ruby tree-sitter mapper', () => { + const src = `class Dog + def initialize(name) + @name = name + end + + def speak + "woof" + end +end + +module Utilities +end + +def top_level_helper + 42 +end +`; + + let pf: ParsedFile; + beforeAll(async () => { pf = await parse('.rb', src); }); + + it('extracts class', () => { expect(names(pf)).toContain('Dog'); }); + it('class kind=class', () => { expect(node(pf, 'Dog')?.attrs.kind).toBe('class'); }); + it('extracts initialize as constructor', () => { expect(names(pf)).toContain('initialize'); }); + it('constructor kind=constructor', () => { expect(node(pf, 'initialize')?.attrs.kind).toBe('constructor'); }); + it('extracts instance method', () => { expect(names(pf)).toContain('speak'); }); + it('instance method kind=method', () => { expect(node(pf, 'speak')?.attrs.kind).toBe('method'); }); + it('extracts module', () => { expect(names(pf)).toContain('Utilities'); }); + it('module kind=interface', () => { expect(node(pf, 'Utilities')?.attrs.kind).toBe('interface'); }); + it('extracts top-level method', () => { expect(names(pf)).toContain('top_level_helper'); }); + it('top-level method kind=function', () => { expect(node(pf, 'top_level_helper')?.attrs.kind).toBe('function'); }); +}); + +// --------------------------------------------------------------------------- +// C# (csharp) +// --------------------------------------------------------------------------- + +describe('csharp tree-sitter mapper', () => { + const src = `/// Base service class. +public abstract class BaseService { + public BaseService() {} + + /// Start the service. + public abstract void Start(); +} + +public interface ILifecycle { + void Start(); +} + +public struct Point { + public int X; +} +`; + + let pf: ParsedFile; + beforeAll(async () => { pf = await parse('.cs', src); }); + + it('extracts class', () => { expect(names(pf)).toContain('BaseService'); }); + it('class kind=class', () => { expect(node(pf, 'BaseService')?.attrs.kind).toBe('class'); }); + it('extracts interface', () => { expect(names(pf)).toContain('ILifecycle'); }); + it('interface kind=interface', () => { expect(node(pf, 'ILifecycle')?.attrs.kind).toBe('interface'); }); + it('extracts struct as type', () => { expect(node(pf, 'Point')?.attrs.kind).toBe('type'); }); + it('extracts method as child of class', () => { expect(names(pf)).toContain('Start'); }); + it('method kind=method', () => { expect(node(pf, 'Start')?.attrs.kind).toBe('method'); }); + it('extracts constructor', () => { expect(names(pf)).toContain('BaseService'); }); +}); + +// --------------------------------------------------------------------------- +// C / C++ +// --------------------------------------------------------------------------- + +describe('cpp tree-sitter mapper', () => { + const src = `namespace net { + class Server { + public: + void start(); + }; +} + +int add(int a, int b) { + return a + b; +} +`; + + let pf: ParsedFile; + beforeAll(async () => { pf = await parse('.cpp', src); }); + + it('extracts namespace', () => { expect(names(pf)).toContain('net'); }); + it('namespace kind=interface', () => { expect(node(pf, 'net')?.attrs.kind).toBe('interface'); }); + it('extracts class', () => { expect(names(pf)).toContain('Server'); }); + it('class kind=class', () => { expect(node(pf, 'Server')?.attrs.kind).toBe('class'); }); + it('extracts top-level function', () => { expect(names(pf)).toContain('add'); }); + it('function kind=function', () => { expect(node(pf, 'add')?.attrs.kind).toBe('function'); }); +}); + +// --------------------------------------------------------------------------- +// Bash +// --------------------------------------------------------------------------- + +describe('bash tree-sitter mapper', () => { + const src = `#!/usr/bin/env bash + +deploy() { + echo "deploying..." +} + +function rollback { + echo "rolling back" +} + +main() { + deploy + rollback +} + +main "$@" +`; + + let pf: ParsedFile; + beforeAll(async () => { pf = await parse('.sh', src); }); + + it('extracts posix-style function', () => { expect(names(pf)).toContain('deploy'); }); + it('extracts function-keyword function', () => { expect(names(pf)).toContain('rollback'); }); + it('extracts main', () => { expect(names(pf)).toContain('main'); }); + it('function kind=function', () => { expect(node(pf, 'deploy')?.attrs.kind).toBe('function'); }); +}); + +// --------------------------------------------------------------------------- +// GDScript +// --------------------------------------------------------------------------- + +describe('gdscript tree-sitter mapper', () => { + const src = `class_name Player extends CharacterBody2D + +signal health_changed(new_health: int) + +enum State { IDLE, RUNNING, JUMPING } + +const MAX_SPEED: float = 200.0 + +var health: int = 100 + +func _ready() -> void: +\tpass + +func take_damage(amount: int) -> void: +\thealth -= amount + +class Weapon: +\tvar damage: int = 10 +\t +\tfunc fire() -> void: +\t\tpass +`; + + let pf: ParsedFile; + beforeAll(async () => { pf = await parse('.gd', src); }); + + it('extracts class_name as class', () => { expect(names(pf)).toContain('Player'); }); + it('class_name kind=class', () => { expect(node(pf, 'Player')?.attrs.kind).toBe('class'); }); + it('extracts signal', () => { expect(names(pf)).toContain('health_changed'); }); + it('extracts enum', () => { expect(names(pf)).toContain('State'); }); + it('enum kind=enum', () => { expect(node(pf, 'State')?.attrs.kind).toBe('enum'); }); + it('extracts const', () => { expect(names(pf)).toContain('MAX_SPEED'); }); + it('extracts var', () => { expect(names(pf)).toContain('health'); }); + it('extracts function', () => { expect(names(pf)).toContain('take_damage'); }); + it('function kind=function', () => { expect(node(pf, 'take_damage')?.attrs.kind).toBe('function'); }); + it('extracts inner class', () => { expect(names(pf)).toContain('Weapon'); }); + it('inner class kind=class', () => { expect(node(pf, 'Weapon')?.attrs.kind).toBe('class'); }); + it('extends edge Player→CharacterBody2D', () => { + expect(pf.edges.some(e => e.attrs.kind === 'extends' && e.from.includes('Player') && e.to.includes('CharacterBody2D'))).toBe(true); + }); +}); + +// --------------------------------------------------------------------------- +// Godot scene (.tscn) — regex mapper +// --------------------------------------------------------------------------- + +describe('godot-scene regex mapper', () => { + const src = `[gd_scene load_steps=3 format=3] + +[ext_resource type="Script" path="res://scripts/player.gd" id="1_abc"] + +[node name="Player" type="CharacterBody2D"] +script = ExtResource("1_abc") + +[node name="Sprite2D" type="Sprite2D" parent="."] + +[node name="Hitbox" type="CollisionShape2D" parent="HUD/Container"] + +[sub_resource type="CapsuleShape2D" id="shape_1"] + +[connection signal="body_entered" from="Player" to="Player" method="_on_body_entered"] +`; + + let pf: ParsedFile; + beforeAll(async () => { pf = await parse('.tscn', src); }); + + it('extracts root node as class', () => { expect(node(pf, 'Player')?.attrs.kind).toBe('class'); }); + it('extracts child node as variable', () => { expect(node(pf, 'Sprite2D')?.attrs.kind).toBe('variable'); }); + it('extracts deeply nested node with path', () => { expect(names(pf)).toContain('HUD/Container/Hitbox'); }); + it('extracts sub_resource', () => { expect(names(pf).some(n => n.includes('CapsuleShape2D'))).toBe(true); }); + it('extracts connection as variable', () => { expect(names(pf).some(n => n.includes('body_entered'))).toBe(true); }); +}); + +// --------------------------------------------------------------------------- +// Godot resource (.tres) — regex mapper +// --------------------------------------------------------------------------- + +describe('godot-resource regex mapper', () => { + const src = `[gd_resource type="PhysicsMaterial" load_steps=2 format=3] + +[ext_resource type="Texture2D" path="res://textures/ground.png" id="1_xyz"] + +[sub_resource type="CurveTexture" id="curve_1"] + +[resource] +friction = 0.7 +`; + + let pf: ParsedFile; + beforeAll(async () => { pf = await parse('.tres', src); }); + + it('extracts resource type as class', () => { expect(names(pf).some(n => n.includes('PhysicsMaterial'))).toBe(true); }); + it('extracts sub_resource', () => { expect(names(pf).some(n => n.includes('curve_1'))).toBe(true); }); +}); + +// --------------------------------------------------------------------------- +// Godot project (project.godot) — regex mapper +// --------------------------------------------------------------------------- + +describe('godot-project regex mapper', () => { + const src = `config_version=5 + +[application] +config/name="My Game" +run/main_scene="res://scenes/main.tscn" + +[rendering] +renderer/rendering_method="forward_plus" +`; + + let pf: ParsedFile; + beforeAll(async () => { pf = await parse('.godot', src); }); + + it('extracts [application] section', () => { expect(names(pf)).toContain('application'); }); + it('extracts [rendering] section', () => { expect(names(pf)).toContain('rendering'); }); +}); + +// --------------------------------------------------------------------------- +// Godot extension (.gdextension) — regex mapper +// --------------------------------------------------------------------------- + +describe('gdextension regex mapper', () => { + const src = `[configuration] +entry_symbol = "example_library_init" +compatibility_minimum = "4.1" + +[libraries] +linux.x86_64 = "res://bin/example.so" +windows.x86_64 = "res://bin/example.dll" +`; + + let pf: ParsedFile; + beforeAll(async () => { pf = await parse('.gdextension', src); }); + + it('extracts [configuration] section', () => { expect(names(pf)).toContain('configuration'); }); + it('extracts [libraries] section', () => { expect(names(pf)).toContain('libraries'); }); +}); + +// --------------------------------------------------------------------------- +// C# — extended: namespace, enum, field, property, extends edge +// --------------------------------------------------------------------------- + +describe('csharp tree-sitter mapper (extended)', () => { + const src = `using System; + +public class Repository { + private string conn; + public int Timeout { get; set; } + + public void Connect() {} +} + +namespace MyApp { + public class Service {} +} + +public enum Direction { + North, + South +} +`; + + let pf: ParsedFile; + beforeAll(async () => { pf = await parse('.cs', src); }); + + it('extracts top-level class', () => { expect(names(pf)).toContain('Repository'); }); + it('extracts property as variable', () => { expect(names(pf)).toContain('Timeout'); }); + it('extracts method as child of class', () => { expect(names(pf)).toContain('Connect'); }); + it('extracts namespace', () => { expect(names(pf)).toContain('MyApp'); }); + it('namespace kind=interface', () => { expect(node(pf, 'MyApp')?.attrs.kind).toBe('interface'); }); + it('extracts class inside namespace', () => { expect(names(pf)).toContain('Service'); }); + it('extracts enum', () => { expect(names(pf)).toContain('Direction'); }); + it('enum kind=enum', () => { expect(node(pf, 'Direction')?.attrs.kind).toBe('enum'); }); +}); + +// --------------------------------------------------------------------------- +// C++ — extended: class with method body, inheritance, template, enum +// --------------------------------------------------------------------------- + +describe('cpp tree-sitter mapper (extended)', () => { + const src = `class Animal { +public: + virtual void speak() {} +}; + +class Dog : Animal { +public: + void speak() override {} +}; + +template +class Box { +}; + +enum Color { Red, Green, Blue }; +`; + + let pf: ParsedFile; + beforeAll(async () => { pf = await parse('.cpp', src); }); + + it('extracts base class', () => { expect(names(pf)).toContain('Animal'); }); + it('extracts derived class', () => { expect(names(pf)).toContain('Dog'); }); + it('extracts class method as child', () => { expect(names(pf)).toContain('speak'); }); + it('method kind=method', () => { expect(node(pf, 'speak')?.attrs.kind).toBe('method'); }); + it('extracts template class', () => { expect(names(pf)).toContain('Box'); }); + it('extracts enum', () => { expect(names(pf)).toContain('Color'); }); + it('enum kind=enum', () => { expect(node(pf, 'Color')?.attrs.kind).toBe('enum'); }); +}); diff --git a/wasm/tree-sitter-gdscript.wasm b/wasm/tree-sitter-gdscript.wasm new file mode 100755 index 0000000..232f882 Binary files /dev/null and b/wasm/tree-sitter-gdscript.wasm differ