From b7ce95f67e19c6e7bf80f506451e535188035f23 Mon Sep 17 00:00:00 2001 From: Luke Zehrung Date: Tue, 2 Jun 2026 12:43:08 -0400 Subject: [PATCH] fix: harden document fallback paths --- src/documentLinks/markdown.ts | 258 +++++++++++-- src/graph-edge-collector.ts | 15 +- src/graphs/symbol-graph-detailed.ts | 6 +- src/indexer/build-index.ts | 4 + src/indexer/build-workers.ts | 6 +- src/indexer/locals-and-exports.ts | 19 +- src/indexer/parse-context.ts | 19 +- src/native/runtime.ts | 6 + src/native/treeSitterNative.ts | 1 + tests/document-links.test.ts | 470 +++++++++++++++++++++++ tests/fallback-import-extraction.test.ts | 46 ++- tests/native-fallback-contract.test.ts | 173 +++++++++ 12 files changed, 979 insertions(+), 44 deletions(-) diff --git a/src/documentLinks/markdown.ts b/src/documentLinks/markdown.ts index 69ace284..0c11917b 100644 --- a/src/documentLinks/markdown.ts +++ b/src/documentLinks/markdown.ts @@ -8,6 +8,9 @@ import { normalizeReferenceLabel, } from "./shared.js"; +const MAX_MARKDOWN_REFERENCE_LABEL_SCAN_LENGTH = 999; +const MAX_MARKDOWN_INLINE_LABEL_SCAN_LENGTH = Number.POSITIVE_INFINITY; + export function extractMarkdownModuleSpecifiers(source: string): ModuleSpecifier[] { const sanitized = stripMarkdownCode(source); return extractMarkdownModuleSpecifiersFromSanitized(sanitized); @@ -25,21 +28,12 @@ function extractMarkdownModuleSpecifiersFromSanitized(sanitized: string): Module if (normalized) out.push(normalized); } - for (const match of sanitized.matchAll(/!?\[([^\]]+)\]\[([^\]]*)\]/g)) { - const fullMatch = match[0] ?? ""; - if (fullMatch.startsWith("!")) continue; - const text = match[1]?.trim(); - const label = match[2]?.trim(); - const resolvedLabel = normalizeReferenceLabel(label || text); - if (!resolvedLabel) continue; - const destination = referenceDefs.get(resolvedLabel); - if (!destination) continue; - out.push(destination); - } + out.push(...collectMarkdownReferenceLinkSpecifiers(sanitized, referenceDefs)); for (const match of sanitized.matchAll(/<([^>\s]+)>/g)) { const candidate = match[1]?.trim(); if (!candidate) continue; + if (match.index !== undefined && isMarkdownAngleDestinationInLinkSyntax(sanitized, match.index)) continue; if (candidate.startsWith("/") || candidate.startsWith("?")) continue; if (!isLikelyMarkdownAutolinkTarget(candidate)) continue; const normalized = normalizeLinkSpecifier(candidate, { @@ -67,22 +61,133 @@ export function extractMdxModuleSpecifiers(source: string): ModuleSpecifier[] { function collectMarkdownReferenceDefinitions(source: string): Map { const out = new Map(); - const definitionRe = /^\s{0,3}\[([^\]]+)\]:\s*(<[^>\n]+>|[^ \t\n]+)(?:[ \t]+(?:"[^"]*"|'[^']*'|\([^)]*\)))?\s*$/gm; - for (const match of source.matchAll(definitionRe)) { - const label = normalizeReferenceLabel(match[1]); - const rawDestination = match[2]; - if (!label || !rawDestination) continue; + for (let lineStart = 0; lineStart < source.length; lineStart += 1) { + const lineEnd = source.indexOf("\n", lineStart); + const endIndex = lineEnd >= 0 ? lineEnd : source.length; + const line = source.slice(lineStart, endIndex); + const leading = line.match(/^ {0,3}/)?.[0] ?? ""; + const labelStart = leading.length; + if (line[labelStart] !== "[") { + lineStart = endIndex; + continue; + } + + const absoluteLabelStart = lineStart + labelStart; + const labelEnd = findMarkdownLabelEnd(source, absoluteLabelStart + 1, MAX_MARKDOWN_REFERENCE_LABEL_SCAN_LENGTH); + if (labelEnd < 0 || labelEnd > lineStart + line.length || source[labelEnd + 1] !== ":") { + lineStart = endIndex; + continue; + } + + const label = normalizeReferenceLabel(source.slice(absoluteLabelStart + 1, labelEnd)); + if (!label) { + lineStart = endIndex; + continue; + } + + const rawDestination = parseMarkdownReferenceDefinitionDestination(source.slice(labelEnd + 2, endIndex)); + if (!rawDestination) { + lineStart = endIndex; + continue; + } const normalized = normalizeLinkSpecifier(rawDestination, { preferRelative: true, resolutionKind: "document", }); if (normalized) out.set(label, normalized); + lineStart = endIndex; } return out; } +function collectMarkdownReferenceLinkSpecifiers( + source: string, + referenceDefs: ReadonlyMap, +): ModuleSpecifier[] { + const out: ModuleSpecifier[] = []; + + for (let index = 0; index < source.length; index += 1) { + if (source[index] !== "[") continue; + if (source[index - 1] === "!") { + const labelEnd = findMarkdownLabelEnd(source, index + 1, MAX_MARKDOWN_INLINE_LABEL_SCAN_LENGTH); + if (labelEnd < 0) { + index = skipConsecutiveMarkdownOpeners(source, index); + continue; + } + const suffix = parseMarkdownReferenceSuffix(source, labelEnd + 1); + if (suffix) { + index = suffix.endIndex; + continue; + } + if (source[labelEnd + 1] === "(") { + const parsed = parseMarkdownInlineLink(source, labelEnd + 2); + index = parsed?.endIndex ?? labelEnd; + continue; + } + index = labelEnd; + continue; + } + const inlineLabelEnd = findMarkdownLabelEnd(source, index + 1, MAX_MARKDOWN_INLINE_LABEL_SCAN_LENGTH); + if (inlineLabelEnd >= 0 && source[inlineLabelEnd + 1] === "(") { + const parsed = parseMarkdownInlineLink(source, inlineLabelEnd + 2); + if (parsed) { + index = parsed.endIndex; + continue; + } + if (isEmptyMarkdownInlineDestination(source, inlineLabelEnd + 2)) { + index = findEmptyMarkdownInlineDestinationEnd(source, inlineLabelEnd + 2); + continue; + } + } + + const labelEnd = findMarkdownLabelEnd(source, index + 1, MAX_MARKDOWN_REFERENCE_LABEL_SCAN_LENGTH); + if (labelEnd < 0) { + index = skipConsecutiveMarkdownOpeners(source, index); + continue; + } + + const suffix = parseMarkdownReferenceSuffix(source, labelEnd + 1); + if (!suffix && isMarkdownReferenceDefinitionLabel(source, index, labelEnd)) { + const lineEnd = source.indexOf("\n", labelEnd + 1); + index = lineEnd >= 0 ? lineEnd : source.length; + continue; + } + + const text = source.slice(index + 1, labelEnd).trim(); + const rawLabel = suffix ? suffix.label.trim() || text : text; + const resolvedLabel = normalizeReferenceLabel(rawLabel); + if (!resolvedLabel) continue; + + const destination = referenceDefs.get(resolvedLabel); + if (!destination) continue; + out.push(destination); + index = suffix?.endIndex ?? labelEnd; + } + + return out; +} + +function isMarkdownReferenceDefinitionLabel(source: string, labelStartIndex: number, labelEndIndex: number): boolean { + const lineStart = source.lastIndexOf("\n", labelStartIndex - 1) + 1; + const prefix = source.slice(lineStart, labelStartIndex); + if (!/^\s{0,3}$/.test(prefix)) return false; + const lineEnd = source.indexOf("\n", labelEndIndex + 1); + const suffixEnd = lineEnd >= 0 ? lineEnd : source.length; + return /^\s*:/.test(source.slice(labelEndIndex + 1, suffixEnd)); +} + +function parseMarkdownReferenceSuffix(source: string, startIndex: number): { label: string; endIndex: number } | null { + if (source[startIndex] !== "[") return null; + const labelEnd = findMarkdownLabelEnd(source, startIndex + 1, MAX_MARKDOWN_REFERENCE_LABEL_SCAN_LENGTH); + if (labelEnd < 0) return null; + return { + label: source.slice(startIndex + 1, labelEnd), + endIndex: labelEnd, + }; +} + function collectMarkdownInlineLinkDestinations(source: string): string[] { const out: string[] = []; @@ -90,9 +195,12 @@ function collectMarkdownInlineLinkDestinations(source: string): string[] { if (source[index] !== "[") continue; if (source[index - 1] === "!") continue; - const labelEnd = findMarkdownLabelEnd(source, index + 1); - if (labelEnd < 0 || source[labelEnd + 1] !== "(") continue; - + const labelEnd = findMarkdownLabelEnd(source, index + 1, MAX_MARKDOWN_INLINE_LABEL_SCAN_LENGTH); + if (labelEnd < 0) { + index = skipConsecutiveMarkdownOpeners(source, index); + continue; + } + if (source[labelEnd + 1] !== "(") continue; const parsed = parseMarkdownInlineLink(source, labelEnd + 2); if (!parsed) continue; @@ -103,20 +211,10 @@ function collectMarkdownInlineLinkDestinations(source: string): string[] { return out; } -function extractMarkdownDestination(rawDestination: string): string { - const trimmed = rawDestination.trim(); - if (!trimmed) return trimmed; - if (trimmed.startsWith("<")) { - const endIndex = trimmed.indexOf(">"); - if (endIndex > 0) return trimmed.slice(0, endIndex + 1); - } - const whitespaceIndex = trimmed.search(/\s/); - return whitespaceIndex >= 0 ? trimmed.slice(0, whitespaceIndex) : trimmed; -} - -function findMarkdownLabelEnd(source: string, openIndex: number): number { +function findMarkdownLabelEnd(source: string, openIndex: number, maxLength: number): number { let depth = 0; - for (let index = openIndex; index < source.length; index += 1) { + const maxIndex = Math.min(source.length, openIndex + maxLength + 1); + for (let index = openIndex; index < maxIndex; index += 1) { const char = source.charAt(index); if (char === "\\") { index += 1; @@ -133,6 +231,102 @@ function findMarkdownLabelEnd(source: string, openIndex: number): number { return -1; } +function skipConsecutiveMarkdownOpeners(source: string, startIndex: number): number { + let index = startIndex; + while (source[index + 1] === "[") { + index += 1; + } + return index; +} + +function parseMarkdownReferenceDefinitionDestination(rawTail: string): string | null { + const trimmed = rawTail.trim(); + if (!trimmed) return null; + + let destination = ""; + let remainder = ""; + if (trimmed.startsWith("<")) { + const endIndex = trimmed.indexOf(">"); + if (endIndex <= 0) return null; + destination = trimmed.slice(0, endIndex + 1); + remainder = trimmed.slice(endIndex + 1).trim(); + } else { + const whitespaceIndex = trimmed.search(/\s/); + if (whitespaceIndex < 0) { + return trimmed; + } + destination = trimmed.slice(0, whitespaceIndex); + remainder = trimmed.slice(whitespaceIndex).trim(); + } + + if (!remainder) return destination; + return isValidMarkdownReferenceTitle(remainder) ? destination : null; +} + +function isValidMarkdownReferenceTitle(remainder: string): boolean { + const opener = remainder.charAt(0); + if (opener === '"' || opener === "'") { + return closesAtEnd(remainder, opener); + } + if (!remainder.startsWith("(")) return false; + return remainder.indexOf(")") === remainder.length - 1; +} + +function closesAtEnd(value: string, delimiter: string): boolean { + for (let index = 1; index < value.length; index += 1) { + const char = value.charAt(index); + if (char === "\\") { + index += 1; + continue; + } + if (char !== delimiter) continue; + return index === value.length - 1; + } + return false; +} + +function isEmptyMarkdownInlineDestination(source: string, startIndex: number): boolean { + return findEmptyMarkdownInlineDestinationEnd(source, startIndex) >= 0; +} + +function findEmptyMarkdownInlineDestinationEnd(source: string, startIndex: number): number { + for (let index = startIndex; index < source.length; index += 1) { + const char = source.charAt(index); + if (char === ")") return index; + if (char === "\n") return -1; + if (!/\s/.test(char)) return -1; + } + return -1; +} + +function isMarkdownAngleDestinationInLinkSyntax(source: string, matchIndex: number): boolean { + let index = matchIndex - 1; + while (index >= 0 && /\s/.test(source.charAt(index))) { + if (source.charAt(index) === "\n") return false; + index -= 1; + } + if (index >= 1 && source.charAt(index) === "(" && source.charAt(index - 1) === "]") { + return true; + } + + const lineStart = source.lastIndexOf("\n", matchIndex - 1) + 1; + const labelOpen = lineStart + (source.slice(lineStart).match(/^ {0,3}/)?.[0].length ?? 0); + if (source[labelOpen] !== "[") return false; + const labelEnd = findMarkdownLabelEnd(source, labelOpen + 1, MAX_MARKDOWN_REFERENCE_LABEL_SCAN_LENGTH); + return labelEnd >= 0 && labelEnd < matchIndex && /^\s*:\s*$/.test(source.slice(labelEnd + 1, matchIndex)); +} + +function extractMarkdownDestination(rawDestination: string): string { + const trimmed = rawDestination.trim(); + if (!trimmed) return trimmed; + if (trimmed.startsWith("<")) { + const endIndex = trimmed.indexOf(">"); + if (endIndex > 0) return trimmed.slice(0, endIndex + 1); + } + const whitespaceIndex = trimmed.search(/\s/); + return whitespaceIndex >= 0 ? trimmed.slice(0, whitespaceIndex) : trimmed; +} + function parseMarkdownInlineLink(source: string, startIndex: number): { destination: string; endIndex: number } | null { let depth = 1; let destinationEnd = -1; diff --git a/src/graph-edge-collector.ts b/src/graph-edge-collector.ts index 6064fb09..df3bd08c 100644 --- a/src/graph-edge-collector.ts +++ b/src/graph-edge-collector.ts @@ -13,6 +13,7 @@ import { isGraphOnlyLanguage, } from "./documentLinks.js"; import { + assertNativeRequiredAvailable, getCompactImportsExecution, type NativeRuntimeMode, type CompactQueryResults, @@ -82,6 +83,8 @@ export async function collectEdgesForFile( const matchesGitSig = !!gitSig && !!cached?.gitSig && cached.gitSig === gitSig; const matchesSig = !!sig && !!cached && cached.sig === sig; + assertNativeRequiredAvailable(opts.native); + if (cached && (matchesGitSig || matchesSig)) { const cloned = cached.edges.map(cloneEdge); emitCacheEntry(cloned); @@ -94,13 +97,21 @@ export async function collectEdgesForFile( let src = parsed?.source; let nativeQueries = parsed?.nativeQueries ?? null; let compactNativeImports: CompactQueryResults | null = null; + let graphOnlyLanguage = sup ? isGraphOnlyLanguage(sup.id) : false; + if (sup && graphOnlyLanguage) { + assertNativeRequiredAvailable(opts.native); + } if (!sup || src === undefined) { const prep = await prepareSourceInput(file); sup = prep.sup; src = prep.source; + graphOnlyLanguage = isGraphOnlyLanguage(sup.id); + if (graphOnlyLanguage) { + assertNativeRequiredAvailable(opts.native); + } const fastRegexDisabled = opts.fastRegexDisabledLanguages?.includes(sup.id); const shouldSkipNativeForFastGraph = !!opts.fast && (sup.id === "ts" || sup.id === "js") && !fastRegexDisabled; - if (!shouldSkipNativeForFastGraph) { + if (!graphOnlyLanguage && !shouldSkipNativeForFastGraph) { // Use compact imports execution for graph mode -- smaller payload const compactExecution = getCompactImportsExecution(src, sup, opts.native); compactNativeImports = compactExecution.results; @@ -146,7 +157,7 @@ export async function collectEdgesForFile( } } - const graphOnlyAliasLanguage = graphOnlyLanguageSupportsImportAliases(sup.id); + const graphOnlyAliasLanguage = graphOnlyLanguage && graphOnlyLanguageSupportsImportAliases(sup.id); const needsGraphOnlyResolutionConfig = graphOnlyAliasLanguage && specs.some(({ spec }) => graphOnlySpecifierNeedsResolutionConfig(spec)); const { matchPath } = diff --git a/src/graphs/symbol-graph-detailed.ts b/src/graphs/symbol-graph-detailed.ts index b9f9e408..c1b5d5cd 100644 --- a/src/graphs/symbol-graph-detailed.ts +++ b/src/graphs/symbol-graph-detailed.ts @@ -4,7 +4,7 @@ import { isUnsupportedParserInputError, prepareSourceInput } from "../languages/ import type { SyntaxTreeLike } from "../languages/types.js"; import { logWithLevel, type LogLevel } from "../logging.js"; import { ProjectedSyntaxTree } from "../native/projectedTree.js"; -import { getNativeSyntaxTreeExecution } from "../native/treeSitterNative.js"; +import { assertNativeRequiredAvailable, getNativeSyntaxTreeExecution, isNativeRequiredUnavailableError } from "../native/treeSitterNative.js"; import { SymbolKind, type ProjectIndex, type ResolvedExport, type SymbolDef } from "../indexer/types.js"; import type { FileId } from "../types.js"; import { buildSymbolGraph, type SymbolGraph } from "./symbol-graph.js"; @@ -34,6 +34,7 @@ export async function buildSymbolGraphDetailed( index: ProjectIndex, opts?: BuildDetailedSymbolGraphOptions, ): Promise { + assertNativeRequiredAvailable(index.nativeMode); const base = await buildSymbolGraph(index, opts?.files ? { files: opts.files } : undefined); const nodes = new Map(base.nodes); const edges = base.edges.slice(); @@ -274,6 +275,9 @@ export async function buildSymbolGraphDetailed( emitClassInheritanceEdges(edgePassContext, classNodes); emitRustImplEdges(edgePassContext, tree.rootNode); } catch (error) { + if (isNativeRequiredUnavailableError(error)) { + throw error; + } if (isUnsupportedParserInputError(error)) { continue; } diff --git a/src/indexer/build-index.ts b/src/indexer/build-index.ts index 9c8b3e9c..16a3bcaf 100644 --- a/src/indexer/build-index.ts +++ b/src/indexer/build-index.ts @@ -14,6 +14,7 @@ import { collectEdgesForFile } from "../graph-edge-collector.js"; import { buildGraphAdjacency } from "../graphs/adjacency.js"; import type { FallbackImportExtractionEvent } from "../graphs/specifiers.js"; import type { GraphBuildOptions, GraphCacheEntry } from "../graphs/types.js"; +import { assertNativeRequiredAvailable } from "../native/treeSitterNative.js"; import { isGraphOnlyLanguage } from "../documentLinks.js"; import { attemptParsePreparedFileContext, type ParsedFileContext } from "./parse-context.js"; import { collectImportsForFile } from "./imports.js"; @@ -395,6 +396,7 @@ function createIndexBuildRunState( graphOptions = normalizeGraphOptions(opts?.graph), ): IndexBuildRunState { const report = opts?.report; + assertNativeRequiredAvailable(opts?.native); initNativeBackendReport(report); const cacheMode = opts?.cache ?? "off"; return { @@ -573,6 +575,7 @@ async function buildIndexFromFileListShared( : {}), resolveNodeModules: !!graphOptions.resolveNodeModules, dynamicImportHeuristics: !!graphOptions.dynamicImportHeuristics, + ...(opts?.native ? { native: opts.native } : {}), ...(opts?.logLevel ? { logLevel: opts.logLevel } : {}), ...(graphOptions.resolutionHints ? { resolutionHints: graphOptions.resolutionHints } : {}), fileSignature: sigInfo, @@ -624,6 +627,7 @@ async function buildIndexFromFileListShared( : {}), resolveNodeModules: !!graphOptions.resolveNodeModules, dynamicImportHeuristics: !!graphOptions.dynamicImportHeuristics, + ...(opts?.native ? { native: opts.native } : {}), ...(opts?.logLevel ? { logLevel: opts.logLevel } : {}), ...(graphOptions.resolutionHints ? { resolutionHints: graphOptions.resolutionHints } : {}), fileSignature: sigInfo, diff --git a/src/indexer/build-workers.ts b/src/indexer/build-workers.ts index e84e27e7..73e69696 100644 --- a/src/indexer/build-workers.ts +++ b/src/indexer/build-workers.ts @@ -1,4 +1,5 @@ import { performance } from "node:perf_hooks"; +import { isGraphOnlyLanguage } from "../documentLinks.js"; import type { LanguageSupport } from "../languages.js"; import { stringifyUnknown } from "../util/ast.js"; import { recordNativeExecutionOutcome } from "../native/nativeBackendReport.js"; @@ -108,7 +109,7 @@ export async function prepareFileContextForBuild( report: BuildReport | undefined, ): Promise { let prepared: PreparedFileContext; - if (workerSetup.pool && !isSFCFile(file)) { + if (workerSetup.pool && !isSFCFile(file) && !isGraphOnlyLanguage(support.id)) { if (workerSetup.report) workerSetup.report.tasksSubmitted++; try { const workerResult: NativeExtractResult = await workerSetup.pool.run(buildWorkerTask(file, support)); @@ -130,6 +131,9 @@ export async function prepareFileContextForBuild( } else { prepared = await prepareFileForIndexing(file, opts?.native); } + if (isGraphOnlyLanguage(prepared.sup.id)) { + return prepared; + } recordNativeExecutionOutcome(report, { file, support: prepared.sup, diff --git a/src/indexer/locals-and-exports.ts b/src/indexer/locals-and-exports.ts index 9d253388..8ee68b9c 100644 --- a/src/indexer/locals-and-exports.ts +++ b/src/indexer/locals-and-exports.ts @@ -5,7 +5,9 @@ import { capturesByName, capturesNamed, rangeFromNativeCapture } from "../native import { ProjectedSyntaxTree } from "../native/projectedTree.js"; import { executeJsQueryAsNativeMatches, + assertNativeRequiredAvailable, getNativeSyntaxTreeExecution, + isNativeRequiredUnavailableError, isNativeBindingLoadedForLanguage, type NativeCapture, type NativeQueryResults, @@ -203,6 +205,7 @@ export function collectLocalsAndExportsFromSource( logLevel?: LogLevel; }, ): ModuleIndex { + assertNativeRequiredAvailable(opts?.nativeMode); if (isGraphOnlyLanguage(support.id)) { return { file, exports: [], imports, locals: [] }; } @@ -324,7 +327,8 @@ export function collectLocalsAndExportsFromSource( tree = parsedTree; jsQueryTree = parsedTree; jsQueryTreeAttempted = true; - } catch { + } catch (error) { + if (isNativeRequiredUnavailableError(error)) throw error; /* parse fallback: ignore */ } return tree; @@ -335,7 +339,8 @@ export function collectLocalsAndExportsFromSource( jsQueryTreeAttempted = true; try { jsQueryTree = parseWithJsLanguage(source, ensureResolvedLang()); - } catch { + } catch (error) { + if (isNativeRequiredUnavailableError(error)) throw error; /* parse fallback: ignore */ } return jsQueryTree; @@ -390,7 +395,8 @@ export function collectLocalsAndExportsFromSource( } } return true; - } catch { + } catch (error) { + if (isNativeRequiredUnavailableError(error)) throw error; return false; } }; @@ -420,6 +426,7 @@ export function collectLocalsAndExportsFromSource( } return true; } catch (error) { + if (isNativeRequiredUnavailableError(error)) throw error; logWithLevel(opts?.logLevel, "warn", `Warning: Query error in locals for ${support.id}:`, error); return false; } @@ -734,7 +741,8 @@ export function collectLocalsAndExportsFromSource( } } usedNativeExports = true; - } catch { + } catch (error) { + if (isNativeRequiredUnavailableError(error)) throw error; usedNativeExports = false; } } @@ -760,7 +768,8 @@ export function collectLocalsAndExportsFromSource( }); } } - } catch { + } catch (error) { + if (isNativeRequiredUnavailableError(error)) throw error; // fall through to regex fallback below } } diff --git a/src/indexer/parse-context.ts b/src/indexer/parse-context.ts index ab2d53a6..6b8c9801 100644 --- a/src/indexer/parse-context.ts +++ b/src/indexer/parse-context.ts @@ -1,6 +1,8 @@ import { parseWithJsLanguage } from "../jsFallback.js"; +import { isGraphOnlyLanguage } from "../documentLinks.js"; import { prepareSourceInput } from "../languages/filePrep.js"; import { + assertNativeRequiredAvailable, getNativeQueryExecution, getNativeSyntaxTreeExecution, type NativeQueryResults, @@ -48,7 +50,10 @@ export type PreparedFileParseAttempt = { export function attemptParsePreparedFileContext(context: PreparedFileContext): PreparedFileParseAttempt { const { file, source, sup, nativeMode, nativeQueries } = context; - const nativeTreeExecution = getNativeSyntaxTreeExecution(source, sup, nativeMode); + const graphOnlyLanguage = isGraphOnlyLanguage(sup.id); + const nativeTreeExecution = graphOnlyLanguage + ? { tree: null, fallbackReason: "unsupportedLanguage" as const } + : getNativeSyntaxTreeExecution(source, sup, nativeMode); if (nativeTreeExecution.tree) { return { parsed: { @@ -59,7 +64,6 @@ export function attemptParsePreparedFileContext(context: PreparedFileContext): P }, }; } - try { const resolvedLang = context.lang ?? sup.language(file); const tree = parseWithJsLanguage(source, resolvedLang); @@ -90,6 +94,17 @@ export function parsePreparedFileContext(context: PreparedFileContext): ParsedFi export async function prepareFileForIndexing(file: string, native?: NativeRuntimeMode): Promise { const prep = await prepareSourceInput(file); + if (isGraphOnlyLanguage(prep.sup.id)) { + assertNativeRequiredAvailable(native); + return { + file, + source: prep.source, + sup: prep.sup, + ...(native ? { nativeMode: native } : {}), + nativeQueries: null, + }; + } + const nativeExecution = getNativeQueryExecution(prep.source, prep.sup, native); return { diff --git a/src/native/runtime.ts b/src/native/runtime.ts index 133744ce..f1b48355 100644 --- a/src/native/runtime.ts +++ b/src/native/runtime.ts @@ -96,6 +96,12 @@ export function throwIfNativeRequiredUnavailable(mode: NativeRuntimeMode | undef throw new Error(`${NATIVE_REQUIRED_ERROR_PREFIX}${suffix}`); } +export function assertNativeRequiredAvailable(mode: NativeRuntimeMode | undefined): void { + if (normalizeNativeRuntimeMode(mode) !== "on") return; + const state = resolveNativeBindingState(mode); + throwIfNativeRequiredUnavailable(mode, state); +} + export function isNativeBindingLoadedForLanguage(languageId: string, mode?: NativeRuntimeMode): boolean { const state = resolveNativeBindingState(mode); return state.loaded && state.supportedLanguageIds.has(languageId); diff --git a/src/native/treeSitterNative.ts b/src/native/treeSitterNative.ts index 80af6bb2..c06953e5 100644 --- a/src/native/treeSitterNative.ts +++ b/src/native/treeSitterNative.ts @@ -21,6 +21,7 @@ export type { export { __resetNativeTreeSitterBindingForTests, + assertNativeRequiredAvailable, getNativeTreeSitterLoadError, getNativeTreeSitterSupportedLanguageIds, isNativeBindingLoadedForLanguage, diff --git a/tests/document-links.test.ts b/tests/document-links.test.ts index 16984b9d..c166f66a 100644 --- a/tests/document-links.test.ts +++ b/tests/document-links.test.ts @@ -91,6 +91,122 @@ describe("document link graph extraction", () => { ).toBe(true); }); + it("handles unmatched markdown labels without rescanning the rest of the document", async () => { + const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-doc-links-md-unmatched-")); + const indexFile = path.join(root, "index.md"); + const guideFile = path.join(root, "guide.md"); + const unmatchedLabels = "[".repeat(5000); + + await fsp.writeFile(indexFile, `${unmatchedLabels}\n[Guide](./guide.md)\n`, "utf8"); + await fsp.writeFile(guideFile, "# Guide\n", "utf8"); + + const normalizedIndex = indexFile.replace(/\\/g, "/"); + const normalizedGuide = guideFile.replace(/\\/g, "/"); + const graph = await collectGraph(root, [normalizedIndex, normalizedGuide]); + + expect( + graph.edges.some( + (edge) => edge.from === normalizedIndex && edge.to.type === "file" && edge.to.path === normalizedGuide, + ), + ).toBe(true); + }); + + it("handles same-line stale markdown labels before valid links", async () => { + const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-doc-links-md-stale-same-line-")); + const indexFile = path.join(root, "index.md"); + const guideFile = path.join(root, "guide.md"); + + await fsp.writeFile(indexFile, "[broken [Guide](./guide.md)\n[broken [Guide][guide]\n\n[guide]: ./guide.md\n", "utf8"); + await fsp.writeFile(guideFile, "# Guide\n", "utf8"); + + const normalizedIndex = indexFile.replace(/\\/g, "/"); + const normalizedGuide = guideFile.replace(/\\/g, "/"); + const graph = await collectGraph(root, [normalizedIndex, normalizedGuide]); + + expect( + graph.edges.some( + (edge) => edge.from === normalizedIndex && edge.to.type === "file" && edge.to.path === normalizedGuide, + ), + ).toBe(true); + }); + + it("keeps enclosing markdown links around nested image labels", async () => { + const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-doc-links-md-image-label-")); + const indexFile = path.join(root, "index.md"); + const guideFile = path.join(root, "guide.md"); + const imageFile = path.join(root, "image.png"); + + await fsp.writeFile(indexFile, "[![Alt](./image.png)](./guide.md)\n", "utf8"); + await fsp.writeFile(guideFile, "# Guide\n", "utf8"); + await fsp.writeFile(imageFile, "png\n", "utf8"); + + const normalizedIndex = indexFile.replace(/\\/g, "/"); + const normalizedGuide = guideFile.replace(/\\/g, "/"); + const normalizedImage = imageFile.replace(/\\/g, "/"); + const graph = await collectGraph(root, [normalizedIndex, normalizedGuide, normalizedImage]); + + expect( + graph.edges.some( + (edge) => edge.from === normalizedIndex && edge.to.type === "file" && edge.to.path === normalizedGuide, + ), + ).toBe(true); + expect( + graph.edges.some( + (edge) => edge.from === normalizedIndex && edge.to.type === "file" && edge.to.path === normalizedImage, + ), + ).toBe(false); + }); + + it("keeps enclosing markdown reference links around nested image labels", async () => { + const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-doc-links-md-ref-image-label-")); + const indexFile = path.join(root, "index.md"); + const guideFile = path.join(root, "guide.md"); + const imageFile = path.join(root, "image.png"); + + await fsp.writeFile( + indexFile, + ["[![Alt][img]][guide]", "", "[img]: ./image.png", "[guide]: ./guide.md"].join("\n"), + "utf8", + ); + await fsp.writeFile(guideFile, "# Guide\n", "utf8"); + await fsp.writeFile(imageFile, "png\n", "utf8"); + + const normalizedIndex = indexFile.replace(/\\/g, "/"); + const normalizedGuide = guideFile.replace(/\\/g, "/"); + const normalizedImage = imageFile.replace(/\\/g, "/"); + const graph = await collectGraph(root, [normalizedIndex, normalizedGuide, normalizedImage]); + + expect( + graph.edges.some( + (edge) => edge.from === normalizedIndex && edge.to.type === "file" && edge.to.path === normalizedGuide, + ), + ).toBe(true); + expect( + graph.edges.some( + (edge) => edge.from === normalizedIndex && edge.to.type === "file" && edge.to.path === normalizedImage, + ), + ).toBe(false); + }); + + it("resolves markdown inline links with multiline labels", async () => { + const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-doc-links-md-multiline-label-")); + const indexFile = path.join(root, "index.md"); + const guideFile = path.join(root, "guide.md"); + + await fsp.writeFile(indexFile, "[Guide\nlabel](./guide.md)\n", "utf8"); + await fsp.writeFile(guideFile, "# Guide\n", "utf8"); + + const normalizedIndex = indexFile.replace(/\\/g, "/"); + const normalizedGuide = guideFile.replace(/\\/g, "/"); + const graph = await collectGraph(root, [normalizedIndex, normalizedGuide]); + + expect( + graph.edges.some( + (edge) => edge.from === normalizedIndex && edge.to.type === "file" && edge.to.path === normalizedGuide, + ), + ).toBe(true); + }); + it("ignores raw HTML and JSX tags in markdown-style autolinks", async () => { const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-doc-links-mdx-")); const pageFile = path.join(root, "page.mdx"); @@ -119,6 +235,102 @@ describe("document link graph extraction", () => { ).toBe(false); }); + it("does not scan markdown inline destinations as reference links", async () => { + const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-doc-links-md-inline-destination-ref-")); + const indexFile = path.join(root, "index.md"); + const guideFile = path.join(root, "guide-[ref].md"); + const otherFile = path.join(root, "other.md"); + + await fsp.writeFile(indexFile, ["[Guide](./guide-[ref].md)", "", "[ref]: ./other.md"].join("\n"), "utf8"); + await fsp.writeFile(guideFile, "# Guide\n", "utf8"); + await fsp.writeFile(otherFile, "# Other\n", "utf8"); + + const normalizedIndex = indexFile.replace(/\\/g, "/"); + const normalizedGuide = guideFile.replace(/\\/g, "/"); + const normalizedOther = otherFile.replace(/\\/g, "/"); + const graph = await collectGraph(root, [normalizedIndex, normalizedGuide, normalizedOther]); + + expect( + graph.edges.some( + (edge) => edge.from === normalizedIndex && edge.to.type === "file" && edge.to.path === normalizedGuide, + ), + ).toBe(true); + expect( + graph.edges.some( + (edge) => edge.from === normalizedIndex && edge.to.type === "file" && edge.to.path === normalizedOther, + ), + ).toBe(false); + }); + + it("treats failed markdown inline destinations as shortcut references", async () => { + const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-doc-links-md-failed-inline-shortcut-")); + const pageFile = path.join(root, "page.md"); + const guideFile = path.join(root, "guide.md"); + + await fsp.writeFile(pageFile, "[Guide](\n\n[Guide]: ./guide.md\n", "utf8"); + await fsp.writeFile(guideFile, "# Guide\n", "utf8"); + + const normalizedPage = pageFile.replace(/\\/g, "/"); + const normalizedGuide = guideFile.replace(/\\/g, "/"); + const graph = await collectGraph(root, [normalizedPage, normalizedGuide]); + + expect( + graph.edges.some( + (edge) => edge.from === normalizedPage && edge.to.type === "file" && edge.to.path === normalizedGuide, + ), + ).toBe(true); + }); + + it("does not resolve whitespace-only empty markdown inline links as shortcuts", async () => { + const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-doc-links-md-empty-inline-shortcut-")); + const pageFile = path.join(root, "page.md"); + const guideFile = path.join(root, "guide.md"); + + await fsp.writeFile(pageFile, "[Guide]( )\n\n[Guide]: ./guide.md\n", "utf8"); + await fsp.writeFile(guideFile, "# Guide\n", "utf8"); + + const normalizedPage = pageFile.replace(/\\/g, "/"); + const normalizedGuide = guideFile.replace(/\\/g, "/"); + const graph = await collectGraph(root, [normalizedPage, normalizedGuide]); + + expect( + graph.edges.some( + (edge) => edge.from === normalizedPage && edge.to.type === "file" && edge.to.path === normalizedGuide, + ), + ).toBe(false); + }); + + it("does not scan nested bracket text in markdown link labels as references", async () => { + const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-doc-links-md-nested-label-ref-")); + const indexFile = path.join(root, "index.md"); + const guideFile = path.join(root, "guide.md"); + const otherFile = path.join(root, "other.md"); + + await fsp.writeFile( + indexFile, + ["[Text [Other] label](./guide.md)", "[Text [Other] label][guide]", "", "[Other]: ./other.md", "[guide]: ./guide.md"].join("\n"), + "utf8", + ); + await fsp.writeFile(guideFile, "# Guide\n", "utf8"); + await fsp.writeFile(otherFile, "# Other\n", "utf8"); + + const normalizedIndex = indexFile.replace(/\\/g, "/"); + const normalizedGuide = guideFile.replace(/\\/g, "/"); + const normalizedOther = otherFile.replace(/\\/g, "/"); + const graph = await collectGraph(root, [normalizedIndex, normalizedGuide, normalizedOther]); + + expect( + graph.edges.some( + (edge) => edge.from === normalizedIndex && edge.to.type === "file" && edge.to.path === normalizedGuide, + ), + ).toBe(true); + expect( + graph.edges.some( + (edge) => edge.from === normalizedIndex && edge.to.type === "file" && edge.to.path === normalizedOther, + ), + ).toBe(false); + }); + it("ignores markdown email autolinks while keeping file autolinks", async () => { const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-doc-links-email-")); const pageFile = path.join(root, "page.md"); @@ -196,6 +408,264 @@ describe("document link graph extraction", () => { ).toBe(false); }); + it("ignores standalone markdown reference image links", async () => { + const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-doc-links-md-ref-image-")); + const pageFile = path.join(root, "page.md"); + const imageFile = path.join(root, "image.png"); + + await fsp.writeFile(pageFile, "![Alt][img]\n\n[img]: ./image.png\n", "utf8"); + await fsp.writeFile(imageFile, "png\n", "utf8"); + + const normalizedPage = pageFile.replace(/\\/g, "/"); + const normalizedImage = imageFile.replace(/\\/g, "/"); + const graph = await collectGraph(root, [normalizedPage, normalizedImage]); + + expect( + graph.edges.some( + (edge) => edge.from === normalizedPage && edge.to.type === "file" && edge.to.path === normalizedImage, + ), + ).toBe(false); + }); + + it("ignores shortcut markdown reference images with nested label text", async () => { + const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-doc-links-md-shortcut-ref-image-")); + const pageFile = path.join(root, "page.md"); + const otherFile = path.join(root, "other.md"); + + await fsp.writeFile(pageFile, "![Alt [Other]]\n\n[Other]: ./other.md\n", "utf8"); + await fsp.writeFile(otherFile, "# Other\n", "utf8"); + + const normalizedPage = pageFile.replace(/\\/g, "/"); + const normalizedOther = otherFile.replace(/\\/g, "/"); + const graph = await collectGraph(root, [normalizedPage, normalizedOther]); + + expect( + graph.edges.some( + (edge) => edge.from === normalizedPage && edge.to.type === "file" && edge.to.path === normalizedOther, + ), + ).toBe(false); + }); + + it("ignores angle-bracket destinations in markdown images", async () => { + const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-doc-links-md-angle-image-")); + const pageFile = path.join(root, "page.md"); + const imageFile = path.join(root, "image.png"); + + await fsp.writeFile(pageFile, "![Alt](<./image.png>)\n![Alt][img]\n\n[img]: <./image.png>\n", "utf8"); + await fsp.writeFile(imageFile, "png\n", "utf8"); + + const normalizedPage = pageFile.replace(/\\/g, "/"); + const normalizedImage = imageFile.replace(/\\/g, "/"); + const graph = await collectGraph(root, [normalizedPage, normalizedImage]); + + expect( + graph.edges.some( + (edge) => edge.from === normalizedPage && edge.to.type === "file" && edge.to.path === normalizedImage, + ), + ).toBe(false); + }); + + it("resolves long markdown labels and bounds stale reference suffix scans", async () => { + const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-doc-links-md-long-label-")); + const pageFile = path.join(root, "page.md"); + const guideFile = path.join(root, "guide.md"); + const longLabel = "a".repeat(300); + const staleSuffixes = "[x][".repeat(1000); + + await fsp.writeFile(pageFile, `${staleSuffixes}\n[${longLabel}](./guide.md)\n`, "utf8"); + await fsp.writeFile(guideFile, "# Guide\n", "utf8"); + + const normalizedPage = pageFile.replace(/\\/g, "/"); + const normalizedGuide = guideFile.replace(/\\/g, "/"); + const graph = await collectGraph(root, [normalizedPage, normalizedGuide]); + + expect( + graph.edges.some( + (edge) => edge.from === normalizedPage && edge.to.type === "file" && edge.to.path === normalizedGuide, + ), + ).toBe(true); + }); + + it("resolves maximum-length markdown reference suffix labels", async () => { + const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-doc-links-md-max-ref-label-")); + const pageFile = path.join(root, "page.md"); + const guideFile = path.join(root, "guide.md"); + const otherFile = path.join(root, "other.md"); + const label = "a".repeat(999); + + await fsp.writeFile(pageFile, `[x][${label}]\n\n[x]: ./other.md\n[${label}]: ./guide.md\n`, "utf8"); + await fsp.writeFile(guideFile, "# Guide\n", "utf8"); + await fsp.writeFile(otherFile, "# Other\n", "utf8"); + + const normalizedPage = pageFile.replace(/\\/g, "/"); + const normalizedGuide = guideFile.replace(/\\/g, "/"); + const normalizedOther = otherFile.replace(/\\/g, "/"); + const graph = await collectGraph(root, [normalizedPage, normalizedGuide, normalizedOther]); + + expect( + graph.edges.some( + (edge) => edge.from === normalizedPage && edge.to.type === "file" && edge.to.path === normalizedGuide, + ), + ).toBe(true); + expect( + graph.edges.some( + (edge) => edge.from === normalizedPage && edge.to.type === "file" && edge.to.path === normalizedOther, + ), + ).toBe(false); + }); + it("ignores bracket text inside markdown reference definition destinations", async () => { + const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-doc-links-md-def-bracket-destination-")); + const pageFile = path.join(root, "page.md"); + const otherFile = path.join(root, "other.md"); + + await fsp.writeFile(pageFile, "[foo]: ./guide-[bar].md\n[bar]: ./other.md\n", "utf8"); + await fsp.writeFile(otherFile, "# Other\n", "utf8"); + + const normalizedPage = pageFile.replace(/\\/g, "/"); + const normalizedOther = otherFile.replace(/\\/g, "/"); + const graph = await collectGraph(root, [normalizedPage, normalizedOther]); + + expect( + graph.edges.some( + (edge) => edge.from === normalizedPage && edge.to.type === "file" && edge.to.path === normalizedOther, + ), + ).toBe(false); + }); + + it("resolves balanced bracket markdown reference labels", async () => { + const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-doc-links-md-balanced-ref-label-")); + const pageFile = path.join(root, "page.md"); + const guideFile = path.join(root, "guide.md"); + + await fsp.writeFile(pageFile, "[x][foo [bar]]\n\n[foo [bar]]: ./guide.md\n", "utf8"); + await fsp.writeFile(guideFile, "# Guide\n", "utf8"); + + const normalizedPage = pageFile.replace(/\\/g, "/"); + const normalizedGuide = guideFile.replace(/\\/g, "/"); + const graph = await collectGraph(root, [normalizedPage, normalizedGuide]); + + expect( + graph.edges.some( + (edge) => edge.from === normalizedPage && edge.to.type === "file" && edge.to.path === normalizedGuide, + ), + ).toBe(true); + }); + + it("ignores malformed markdown reference definitions with invalid titles", async () => { + const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-doc-links-md-invalid-ref-title-")); + const pageFile = path.join(root, "page.md"); + const guideFile = path.join(root, "guide.md"); + + await fsp.writeFile(pageFile, "[bad][bad]\n\n[bad]: ./guide.md invalid title\n", "utf8"); + await fsp.writeFile(guideFile, "# Guide\n", "utf8"); + + const normalizedPage = pageFile.replace(/\\/g, "/"); + const normalizedGuide = guideFile.replace(/\\/g, "/"); + const graph = await collectGraph(root, [normalizedPage, normalizedGuide]); + + expect( + graph.edges.some( + (edge) => edge.from === normalizedPage && edge.to.type === "file" && edge.to.path === normalizedGuide, + ), + ).toBe(false); + }); + + it("ignores balanced-label markdown reference definitions with angle destinations", async () => { + const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-doc-links-md-balanced-angle-dest-")); + const pageFile = path.join(root, "page.md"); + const imageFile = path.join(root, "image.png"); + + await fsp.writeFile(pageFile, "[foo [bar]]: <./image.png>\n", "utf8"); + await fsp.writeFile(imageFile, "png\n", "utf8"); + + const normalizedPage = pageFile.replace(/\\/g, "/"); + const normalizedImage = imageFile.replace(/\\/g, "/"); + const graph = await collectGraph(root, [normalizedPage, normalizedImage]); + + expect( + graph.edges.some( + (edge) => edge.from === normalizedPage && edge.to.type === "file" && edge.to.path === normalizedImage, + ), + ).toBe(false); + }); + + it("parseFile still supports graph-only markdown inputs", async () => { + const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-doc-links-parse-markdown-")); + const pageFile = path.join(root, "page.md"); + await fsp.writeFile(pageFile, "[Guide](./guide.md)\n", "utf8"); + + const parsed = await import("../src/indexer.js").then((mod) => mod.parseFile(pageFile)); + expect(parsed.sup.id).toBe("markdown"); + expect(parsed.source).toContain("[Guide]"); + }); + + it("ignores markdown reference definitions with trailing title tokens", async () => { + const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-doc-links-md-extra-ref-title-")); + const pageFile = path.join(root, "page.md"); + const guideFile = path.join(root, "guide.md"); + + await fsp.writeFile(pageFile, '[bad][bad]\n\n[bad]: ./guide.md "title" "extra"\n', "utf8"); + await fsp.writeFile(guideFile, "# Guide\n", "utf8"); + + const normalizedPage = pageFile.replace(/\\/g, "/"); + const normalizedGuide = guideFile.replace(/\\/g, "/"); + const graph = await collectGraph(root, [normalizedPage, normalizedGuide]); + + expect( + graph.edges.some( + (edge) => edge.from === normalizedPage && edge.to.type === "file" && edge.to.path === normalizedGuide, + ), + ).toBe(false); + }); + + it("ignores overlong markdown reference labels", async () => { + const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-doc-links-md-overlong-ref-label-")); + const pageFile = path.join(root, "page.md"); + const guideFile = path.join(root, "guide.md"); + const label = "a".repeat(1000); + + await fsp.writeFile(pageFile, `[x][${label}]\n\n[${label}]: ./guide.md\n`, "utf8"); + await fsp.writeFile(guideFile, "# Guide\n", "utf8"); + + const normalizedPage = pageFile.replace(/\\/g, "/"); + const normalizedGuide = guideFile.replace(/\\/g, "/"); + const graph = await collectGraph(root, [normalizedPage, normalizedGuide]); + + expect( + graph.edges.some( + (edge) => edge.from === normalizedPage && edge.to.type === "file" && edge.to.path === normalizedGuide, + ), + ).toBe(false); + }); + + it("skips long markdown inline destinations before reference scanning", async () => { + const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-doc-links-md-long-inline-destination-")); + const pageFile = path.join(root, "page.md"); + const guideFile = path.join(root, "guide-[ref].md"); + const otherFile = path.join(root, "other.md"); + const label = "a".repeat(1000); + + await fsp.writeFile(pageFile, `[${label}](./guide-[ref].md)\n\n[ref]: ./other.md\n`, "utf8"); + await fsp.writeFile(guideFile, "# Guide\n", "utf8"); + await fsp.writeFile(otherFile, "# Other\n", "utf8"); + + const normalizedPage = pageFile.replace(/\\/g, "/"); + const normalizedGuide = guideFile.replace(/\\/g, "/"); + const normalizedOther = otherFile.replace(/\\/g, "/"); + const graph = await collectGraph(root, [normalizedPage, normalizedGuide, normalizedOther]); + + expect( + graph.edges.some( + (edge) => edge.from === normalizedPage && edge.to.type === "file" && edge.to.path === normalizedGuide, + ), + ).toBe(true); + expect( + graph.edges.some( + (edge) => edge.from === normalizedPage && edge.to.type === "file" && edge.to.path === normalizedOther, + ), + ).toBe(false); + }); + it("ignores anchor-only asciidoc xrefs while keeping file references", async () => { const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-doc-links-adoc-")); const indexFile = path.join(root, "index.asciidoc"); diff --git a/tests/fallback-import-extraction.test.ts b/tests/fallback-import-extraction.test.ts index 595fd656..652bef02 100644 --- a/tests/fallback-import-extraction.test.ts +++ b/tests/fallback-import-extraction.test.ts @@ -1,4 +1,4 @@ -import { describe, it, expect } from "vitest"; +import { describe, it, expect, vi } from "vitest"; import path from "node:path"; import os from "node:os"; import fsp from "node:fs/promises"; @@ -8,6 +8,7 @@ import { getNativeTreeSitterSupportedLanguageIds, isNativeTreeSitterAvailable, } from "../src/native/treeSitterNative.js"; +import * as nativeRuntime from "../src/native/treeSitterNative.js"; async function mkTmpDir(prefix: string): Promise { return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); @@ -224,6 +225,49 @@ describe("Import extraction fallback reporting", () => { } }); + it("does not route graph-only documents through native query reporting", async () => { + const root = await mkTmpDir("cg-native-doc-report-"); + const page = path.join(root, "page.md"); + const guide = path.join(root, "guide.md"); + await fsp.writeFile(page, "[Guide](./guide.md)\n", "utf8"); + await fsp.writeFile(guide, "# Guide\n", "utf8"); + + const indexReport: BuildReport = { timings: {} }; + const graphReport: BuildReport = { timings: {} }; + const index = await buildProjectIndexFromFiles(root, [page, guide], { report: indexReport }); + const graph = await collectGraph(root, [page, guide], { report: graphReport }); + + expect(index.graph.edges.some((edge) => edge.to.type === "file" && edge.to.path === guide.replace(/\\/g, "/"))).toBe( + true, + ); + expect(graph.edges.some((edge) => edge.to.type === "file" && edge.to.path === guide.replace(/\\/g, "/"))).toBe( + true, + ); + expect(indexReport.backend?.native.byLanguage.markdown).toBeUndefined(); + expect(graphReport.backend?.native.byLanguage.markdown).toBeUndefined(); + }); + + it("honors required native availability for graph-only documents without reporting document queries", async () => { + const root = await mkTmpDir("cg-native-doc-required-"); + const page = path.join(root, "page.md"); + await fsp.writeFile(page, "[Guide](./guide.md)\n", "utf8"); + const requiredError = "native tree-sitter required by explicit option but unavailable"; + + vi.spyOn(nativeRuntime, "assertNativeRequiredAvailable").mockImplementation((mode) => { + if (mode !== "on") throw new Error(`unexpected native mode: ${String(mode)}`); + throw new Error(requiredError); + }); + await expect(buildProjectIndexFromFiles(root, [page], { native: "on" })).rejects.toThrow(requiredError); + vi.restoreAllMocks(); + + vi.spyOn(nativeRuntime, "assertNativeRequiredAvailable").mockImplementation((mode) => { + if (mode !== "on") throw new Error(`unexpected native mode: ${String(mode)}`); + throw new Error(requiredError); + }); + await expect(collectGraph(root, [page], { native: "on" })).rejects.toThrow(requiredError); + vi.restoreAllMocks(); + }); + it("avoids Python query-empty fallback warnings for __future__ imports", async () => { const root = await mkTmpDir("cg-python-future-import-"); const main = path.join(root, "main.py"); diff --git a/tests/native-fallback-contract.test.ts b/tests/native-fallback-contract.test.ts index c35a8a23..1c5ec15e 100644 --- a/tests/native-fallback-contract.test.ts +++ b/tests/native-fallback-contract.test.ts @@ -7,13 +7,18 @@ import { buildProjectIndex, buildProjectIndexFromFiles, buildProjectIndexIncremental, + buildSymbolGraphDetailed, collectImportsForFile, collectLocalsAndExportsFromSource, + SymbolKind, type BuildReport, type ModuleIndex, + type ProjectIndex, } from "../src/index.js"; import { prepareParserInput } from "../src/languages/filePrep.js"; import * as nativeRuntime from "../src/native/treeSitterNative.js"; +import { supportForFile } from "../src/languages.js"; +import type { NativeCapture, NativeQueryResults } from "../src/native/treeSitterNative.js"; const nativeDescribe = nativeRuntime.isNativeTreeSitterAvailable() ? describe : describe.skip; @@ -21,6 +26,16 @@ function normalizeFile(file: string): string { return path.resolve(file).replace(/\\/g, "/"); } +const REQUIRED_NATIVE_UNAVAILABLE = "native tree-sitter required by explicit option but unavailable"; + +const nativeCapture = (name: string, text: string): NativeCapture => ({ + name, + text, + nodeType: "identifier", + start: { row: 0, column: 0, index: 0 }, + end: { row: 0, column: text.length, index: text.length }, +}); + function simplifyModule(index: ModuleIndex): unknown { return { imports: index.imports.map((entry) => ({ @@ -135,6 +150,164 @@ afterEach(() => { vi.restoreAllMocks(); }); +describe("native required fallback boundaries", () => { + it("does not suppress required-native failures during locals enrichment", () => { + const file = normalizeFile(path.join(os.tmpdir(), "required-native.ts")); + const support = supportForFile(file); + expect(support).toBeDefined(); + if (!support) return; + + const nativeQueries: NativeQueryResults = { + imports: [], + exports: [], + locals: [ + { + patternIndex: 0, + captures: [nativeCapture("name", "alpha")], + }, + ], + importBindings: [], + }; + vi.spyOn(nativeRuntime, "assertNativeRequiredAvailable").mockImplementation((mode) => { + if (mode !== "on") throw new Error(`unexpected native mode: ${String(mode)}`); + }); + const syntaxSpy = vi + .spyOn(nativeRuntime, "getNativeSyntaxTreeExecution") + .mockImplementation((_source, _support, mode) => { + if (mode !== "on") throw new Error(`unexpected native mode: ${String(mode)}`); + throw new Error(REQUIRED_NATIVE_UNAVAILABLE); + }); + + expect(() => + collectLocalsAndExportsFromSource(file, "export const alpha = 1;\n", support, undefined, [], { + nativeMode: "on", + nativeQueries, + }), + ).toThrow(REQUIRED_NATIVE_UNAVAILABLE); + expect(syntaxSpy).toHaveBeenCalled(); + }); + + it("does not suppress required-native failures for graph-only local collection", () => { + const file = normalizeFile(path.join(os.tmpdir(), "required-native.md")); + const support = supportForFile(file); + expect(support).toBeDefined(); + if (!support) return; + + vi.spyOn(nativeRuntime, "assertNativeRequiredAvailable").mockImplementation((mode) => { + if (mode !== "on") throw new Error(`unexpected native mode: ${String(mode)}`); + throw new Error(REQUIRED_NATIVE_UNAVAILABLE); + }); + + expect(() => + collectLocalsAndExportsFromSource(file, "[Guide](./guide.md)\n", support, undefined, [], { + nativeMode: "on", + }), + ).toThrow(REQUIRED_NATIVE_UNAVAILABLE); + }); + + it("validates required-native mode before using supplied non-graph-only query data", () => { + const file = normalizeFile(path.join(os.tmpdir(), "required-native-supplied.ts")); + const support = supportForFile(file); + expect(support).toBeDefined(); + if (!support) return; + + const nativeQueries: NativeQueryResults = { + imports: [], + exports: [], + locals: [], + importBindings: [], + }; + vi.spyOn(nativeRuntime, "assertNativeRequiredAvailable").mockImplementation((mode) => { + if (mode !== "on") throw new Error(`unexpected native mode: ${String(mode)}`); + throw new Error(REQUIRED_NATIVE_UNAVAILABLE); + }); + + expect(() => + collectLocalsAndExportsFromSource(file, "export const alpha = 1;\n", support, undefined, [], { + nativeMode: "on", + nativeQueries, + }), + ).toThrow(REQUIRED_NATIVE_UNAVAILABLE); + }); + + it("does not suppress required-native failures during detailed symbol graph building", async () => { + const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-required-native-detailed-")); + const file = normalizeFile(path.join(root, "entry.ts")); + await fsp.writeFile(file, "export function alpha() { return 1; }\n", "utf8"); + const local = { + file, + localName: "alpha", + kind: SymbolKind.Function, + range: { + start: { line: 1, column: 16, index: 16 }, + end: { line: 1, column: 21, index: 21 }, + }, + }; + const moduleIndex: ModuleIndex = { + file, + exports: [{ type: "local", exportedAs: "alpha", target: local }], + imports: [], + locals: [local], + }; + const index: ProjectIndex = { + graph: { nodes: new Set([file]), edges: [] }, + modules: new Map([[file, moduleIndex]]), + byFile: new Map([[file, moduleIndex]]), + projectRoot: root, + nativeMode: "on", + exportCache: new Map(), + scopeCache: new Map(), + }; + vi.spyOn(nativeRuntime, "assertNativeRequiredAvailable").mockImplementation((mode) => { + if (mode !== "on") throw new Error(`unexpected native mode: ${String(mode)}`); + }); + const syntaxSpy = vi + .spyOn(nativeRuntime, "getNativeSyntaxTreeExecution") + .mockImplementation((_source, _support, mode) => { + if (mode !== "on") throw new Error(`unexpected native mode: ${String(mode)}`); + throw new Error(REQUIRED_NATIVE_UNAVAILABLE); + }); + + try { + await expect(buildSymbolGraphDetailed(index)).rejects.toThrow(REQUIRED_NATIVE_UNAVAILABLE); + expect(syntaxSpy).toHaveBeenCalled(); + } finally { + await fsp.rm(root, { recursive: true, force: true }); + } + }); + + it("does not suppress required-native failures for graph-only detailed symbol graph files", async () => { + const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-required-native-detailed-md-")); + const file = normalizeFile(path.join(root, "entry.md")); + await fsp.writeFile(file, "[Guide](./guide.md)\n", "utf8"); + const moduleIndex: ModuleIndex = { + file, + exports: [], + imports: [], + locals: [], + }; + const index: ProjectIndex = { + graph: { nodes: new Set([file]), edges: [] }, + modules: new Map([[file, moduleIndex]]), + byFile: new Map([[file, moduleIndex]]), + projectRoot: root, + nativeMode: "on", + exportCache: new Map(), + scopeCache: new Map(), + }; + vi.spyOn(nativeRuntime, "assertNativeRequiredAvailable").mockImplementation((mode) => { + if (mode !== "on") throw new Error(`unexpected native mode: ${String(mode)}`); + throw new Error(REQUIRED_NATIVE_UNAVAILABLE); + }); + + try { + await expect(buildSymbolGraphDetailed(index)).rejects.toThrow(REQUIRED_NATIVE_UNAVAILABLE); + } finally { + await fsp.rm(root, { recursive: true, force: true }); + } + }); +}); + nativeDescribe("native fallback contract", () => { it("falls back cleanly for one file without mixing native and JS extraction", async () => { const { root, alphaFile, betaFile } = await makeTempProject();