From 0ee270d6e2281fdefa4d71e2928f29498006c3a5 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 12 Mar 2026 09:39:59 +0000 Subject: [PATCH 01/11] fix: allow special characters in anchor/alias names (2SXE, W5VH) Per YAML spec, anchor and alias names can contain colons and other special characters. Only terminate alias names at colon when followed by space/tab (key-value separator context). Also resolve aliases immediately in the lexer when the anchor exists, since deferred markers can be lost through Jsonic's rule processing for indented values. https://claude.ai/code/session_01H3rUS9E1u5eXZrzyYiBPB3 --- src/yaml.ts | 27 +++++++++++++++++++++++---- test/yaml-test-suite.test.ts | 2 -- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/src/yaml.ts b/src/yaml.ts index 8f25a06..08ffea9 100644 --- a/src/yaml.ts +++ b/src/yaml.ts @@ -792,7 +792,13 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { while (nameEnd < fwd.length && fwd[nameEnd] !== ' ' && fwd[nameEnd] !== '\t' && fwd[nameEnd] !== '\n' && fwd[nameEnd] !== '\r' && fwd[nameEnd] !== ',' && fwd[nameEnd] !== '{' && fwd[nameEnd] !== '}' && fwd[nameEnd] !== '[' && - fwd[nameEnd] !== ']' && fwd[nameEnd] !== ':') nameEnd++ + fwd[nameEnd] !== ']') { + // Colon terminates only when followed by space/tab (key-value separator). + // Otherwise colon is a valid anchor-name character per YAML spec. + if (fwd[nameEnd] === ':' && + (fwd[nameEnd+1] === ' ' || fwd[nameEnd+1] === '\t')) break + nameEnd++ + } let name = fwd.substring(1, nameEnd) let src = fwd.substring(0, nameEnd) // Check if this alias is used as a map key (followed by ` :` or `:`). @@ -810,9 +816,22 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { pnt.cI += nameEnd return tkn } - // Store the alias name as a special marker object. - let marker = { __yamlAlias: name } - let tkn = lex.token('#VL', marker, src, lex.pnt) + // Resolve alias immediately if anchor exists, since deferred + // markers can be lost through Jsonic's rule processing. + let tkn: any + if (anchors[name] !== undefined) { + let val = anchors[name] + if (typeof val === 'object' && val !== null) { + val = JSON.parse(JSON.stringify(val)) + } + let tin = typeof val === 'string' ? '#TX' : + typeof val === 'number' ? '#NR' : '#VL' + tkn = lex.token(tin, val, src, lex.pnt) + } else { + // Anchor not yet seen — store marker for deferred resolution. + let marker = { __yamlAlias: name } + tkn = lex.token('#VL', marker, src, lex.pnt) + } pnt.sI += nameEnd pnt.cI += nameEnd return tkn diff --git a/test/yaml-test-suite.test.ts b/test/yaml-test-suite.test.ts index 0216183..b4ac5e4 100644 --- a/test/yaml-test-suite.test.ts +++ b/test/yaml-test-suite.test.ts @@ -27,7 +27,6 @@ const SUITE_DIR = join(__dirname, '..', 'test', 'yaml-test-suite') // or edge cases where Jsonic's base grammar conflicts with YAML semantics. // As parser coverage improves, entries should be removed and tests should pass. const SKIP: Record = { - '2SXE': 'output mismatch', '5WE3': 'parse error', '8KB6': 'parse error', '9BXH': 'parse error', @@ -38,7 +37,6 @@ const SKIP: Record = { 'K3WX': 'parse error', 'M5C3': 'parse error', 'P76L': 'output mismatch', - 'W5VH': 'output mismatch', } From 8fc6593a0986686dbd79e5d92b3c1e7caeee741a Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 12 Mar 2026 09:42:12 +0000 Subject: [PATCH 02/11] fix: respect %TAG directive for custom !! handle (P76L) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parse %TAG directives during source cleanup and store handle→prefix mappings. When !! has been redefined by %TAG, skip built-in type conversion (!!int, !!float, etc.) and treat the value as a plain string. https://claude.ai/code/session_01H3rUS9E1u5eXZrzyYiBPB3 --- src/yaml.ts | 37 +++++++++++++++++++++++++++--------- test/yaml-test-suite.test.ts | 1 - 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/src/yaml.ts b/src/yaml.ts index 08ffea9..79cf40f 100644 --- a/src/yaml.ts +++ b/src/yaml.ts @@ -34,6 +34,9 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { let pendingExplicitCL = false // Queue for tokens that need to be emitted across multiple lex calls. let pendingTokens: any[] = [] + // TAG directive handle mappings (e.g. %TAG !! tag:example.com/). + // When !! is redefined, built-in type conversion is skipped. + let tagHandles: Record = {} jsonic.options({ fixed: { @@ -650,6 +653,15 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { if (src[0] === '%') { let dIdx = src.indexOf('\n---') if (dIdx >= 0) { + // Parse %TAG directives before stripping. + let dirBlock = src.substring(0, dIdx) + let dirLines = dirBlock.split('\n') + for (let dl of dirLines) { + let tagMatch = dl.match(/^%TAG\s+(\S+)\s+(\S+)/) + if (tagMatch) { + tagHandles[tagMatch[1]] = tagMatch[2] + } + } hadDirective = true src = src.substring(dIdx + 1) } @@ -1072,10 +1084,12 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { if (fwd[valEnd] === q) valEnd++ let rawVal = fwd.substring(valStart + 1, valEnd - 1) let result: any = rawVal - if (tag === 'int') result = parseInt(rawVal, 10) - else if (tag === 'float') result = parseFloat(rawVal) - else if (tag === 'bool') result = rawVal === 'true' || rawVal === 'True' || rawVal === 'TRUE' - else if (tag === 'null') result = null + if (!tagHandles['!!']) { + if (tag === 'int') result = parseInt(rawVal, 10) + else if (tag === 'float') result = parseFloat(rawVal) + else if (tag === 'bool') result = rawVal === 'true' || rawVal === 'True' || rawVal === 'TRUE' + else if (tag === 'null') result = null + } if (tagAnchorName) anchors[tagAnchorName] = result let tknTin = typeof result === 'string' ? '#TX' : typeof result === 'number' ? '#NR' : '#VL' @@ -1111,11 +1125,16 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { } let rawVal = fwd.substring(valStart, valEnd).replace(/\s+$/, '') let result: any = rawVal - if (tag === 'str') result = String(rawVal) - else if (tag === 'int') result = parseInt(rawVal, 10) - else if (tag === 'float') result = parseFloat(rawVal) - else if (tag === 'bool') result = rawVal === 'true' || rawVal === 'True' || rawVal === 'TRUE' - else if (tag === 'null') result = null + // Only apply built-in type conversion when !! has not been + // redefined by a %TAG directive. Custom tag handles mean + // !!type is a user-defined tag, not a YAML core type. + if (!tagHandles['!!']) { + if (tag === 'str') result = String(rawVal) + else if (tag === 'int') result = parseInt(rawVal, 10) + else if (tag === 'float') result = parseFloat(rawVal) + else if (tag === 'bool') result = rawVal === 'true' || rawVal === 'True' || rawVal === 'TRUE' + else if (tag === 'null') result = null + } if (tagAnchorName) anchors[tagAnchorName] = result // Use #ST for empty strings (jsonic handles #ST better than // empty #TX in flow context), #NR for numbers, #VL for null. diff --git a/test/yaml-test-suite.test.ts b/test/yaml-test-suite.test.ts index b4ac5e4..88d0df5 100644 --- a/test/yaml-test-suite.test.ts +++ b/test/yaml-test-suite.test.ts @@ -36,7 +36,6 @@ const SKIP: Record = { 'JTV5': 'parse error', 'K3WX': 'parse error', 'M5C3': 'parse error', - 'P76L': 'output mismatch', } From 1c14f9ba47ef1d05d0c3b2e4a6ca855e271d7e84 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 12 Mar 2026 09:47:00 +0000 Subject: [PATCH 03/11] fix: block scalar with explicit indent after standalone tag (M5C3) When a block scalar indicator (|2, >1) appears on a separate line from the mapping colon (e.g., after a standalone tag like !foo), look backward to find the parent mapping key's indent for correct blockIndent calculation. Also consume trailing newline after standalone local tags to prevent extra #IN tokens. https://claude.ai/code/session_01H3rUS9E1u5eXZrzyYiBPB3 --- src/yaml.ts | 88 +++++++++++++++++++++++++++--------- test/yaml-test-suite.test.ts | 1 - 2 files changed, 67 insertions(+), 22 deletions(-) diff --git a/src/yaml.ts b/src/yaml.ts index 79cf40f..f33b8ad 100644 --- a/src/yaml.ts +++ b/src/yaml.ts @@ -133,43 +133,62 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { // indent of the containing block (e.g., the mapping key), which // may differ from the line's leading spaces (e.g., after "- "). if (explicitIndent > 0) { - // Find the column of the colon that precedes the block indicator. - // The key's indent level is the number of spaces before the key - // on this line, which accounts for "- " prefixes. + // Find the line containing the block indicator. let li = pnt.sI - 1 while (li > 0 && lex.src[li - 1] !== '\n' && lex.src[li - 1] !== '\r') li-- // li is now at the start of the line. Find the colon position. let keyCol = containingIndent + // Check if there's a colon on the SAME line as the block indicator. + let hasColonOnLine = false for (let ci = li + containingIndent; ci < pnt.sI; ci++) { if (lex.src[ci] === ':' && (lex.src[ci+1] === ' ' || lex.src[ci+1] === '\t')) { - // Key indent is the column of the first non-space after the - // sequence indicators. For "- aaa:", keyCol is 2 (after "- "). - // For " aaa:", keyCol is 2 (leading spaces). + hasColonOnLine = true break } } - // Check for sequence indicators: each "- " adds to the effective indent. - // But only when the block scalar is a value inside a mapping within - // the sequence (e.g., "- key: |2"), not when it's a direct value - // (e.g., "- |1"). Detect by checking for ": " between "- " and the - // block indicator. - let scanI = li + containingIndent - let hasColon = false - for (let ci = scanI; ci < pnt.sI; ci++) { - if (lex.src[ci] === ':' && (lex.src[ci+1] === ' ' || lex.src[ci+1] === '\t')) { - hasColon = true - break - } - } - if (hasColon) { + if (hasColonOnLine) { + // Block indicator on same line as colon (e.g., "key: |2"). + // Check for sequence indicators: each "- " adds to the effective indent. + let scanI = li + containingIndent while (scanI < pnt.sI && lex.src[scanI] === '-' && (lex.src[scanI+1] === ' ' || lex.src[scanI+1] === '\t')) { keyCol += 2 scanI += 2 while (scanI < pnt.sI && lex.src[scanI] === ' ') { keyCol++; scanI++ } } + blockIndent = keyCol + explicitIndent + } else { + // Block indicator on its own line (e.g., after a tag on + // a separate line). Look backward to find the parent + // mapping key's indent by scanning previous lines for + // the colon that started this value context. + let parentIndent = 0 + let searchI = li - 1 + while (searchI > 0) { + // Find start of previous line. + if (lex.src[searchI] === '\n') searchI-- + if (lex.src[searchI] === '\r') searchI-- + let prevLineEnd = searchI + 1 + while (searchI > 0 && lex.src[searchI - 1] !== '\n' && lex.src[searchI - 1] !== '\r') searchI-- + let prevLineStart = searchI + // Check if this line has a colon (mapping key). + for (let ci = prevLineStart; ci < prevLineEnd; ci++) { + if (lex.src[ci] === ':' && (lex.src[ci+1] === ' ' || lex.src[ci+1] === '\t' || + lex.src[ci+1] === '\n' || lex.src[ci+1] === '\r' || ci+1 >= prevLineEnd)) { + // Found the parent key line. Get its indent. + parentIndent = 0 + let pi = prevLineStart + while (pi < prevLineEnd && lex.src[pi] === ' ') { parentIndent++; pi++ } + break + } + } + break // Only check the immediately preceding non-blank line. + } + blockIndent = parentIndent + explicitIndent + // Update containingIndent to parent's indent so the + // "blockIndent <= containingIndent" check below works. + containingIndent = parentIndent } - blockIndent = keyCol + explicitIndent } if (blockIndent <= containingIndent && !isDocStart && idx < fwd.length) { // Content is not indented enough — empty block scalar. @@ -996,6 +1015,33 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { if (fwd[tagEnd] === ' ') tagEnd++ // skip space after tag pnt.sI += tagEnd pnt.cI += tagEnd + // If tag is standalone (followed by newline), consume the + // newline and leading spaces so no extra #IN is emitted. + if (pnt.sI < lex.src.length && + (lex.src[pnt.sI] === '\n' || lex.src[pnt.sI] === '\r')) { + // Check if tag is standalone on its line. + let tagStandalone = true + let tagLineIndent = 0 + let bi = pnt.sI - tagEnd - 1 + while (bi >= 0 && lex.src[bi] !== '\n' && lex.src[bi] !== '\r') { + if (lex.src[bi] !== ' ' && lex.src[bi] !== '\t') { + tagStandalone = false + break + } + tagLineIndent++ + bi-- + } + if (tagStandalone) { + let nl = pnt.sI + if (lex.src[nl] === '\r') nl++ + if (lex.src[nl] === '\n') nl++ + let spaces = 0 + while (nl + spaces < lex.src.length && lex.src[nl + spaces] === ' ') spaces++ + pnt.sI = nl + spaces + pnt.cI = spaces + pnt.rI++ + } + } fwd = lex.src.substring(pnt.sI) // Restart matching to parse the value. continue yamlMatchLoop diff --git a/test/yaml-test-suite.test.ts b/test/yaml-test-suite.test.ts index 88d0df5..2042455 100644 --- a/test/yaml-test-suite.test.ts +++ b/test/yaml-test-suite.test.ts @@ -35,7 +35,6 @@ const SKIP: Record = { 'CT4Q': 'parse error', 'JTV5': 'parse error', 'K3WX': 'parse error', - 'M5C3': 'parse error', } From 57bf4c2757c6c7a2eda0d640a97c14d510cb691b Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 12 Mar 2026 09:50:54 +0000 Subject: [PATCH 04/11] fix: plain scalar continuation vs sequence entry indent (AB8U) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a continuation line starts with "- ", check whether its indent matches the nearest enclosing sequence marker's indent. Only treat it as a new sequence entry at the matching indent level — at other indents, it's plain scalar text continuation. https://claude.ai/code/session_01H3rUS9E1u5eXZrzyYiBPB3 --- src/yaml.ts | 28 ++++++++++++++++++++++++---- test/yaml-test-suite.test.ts | 1 - 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/yaml.ts b/src/yaml.ts index f33b8ad..111905f 100644 --- a/src/yaml.ts +++ b/src/yaml.ts @@ -545,10 +545,30 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { (fwd[i] === '.' && fwd[i+1] === '.' && fwd[i+2] === '.' && (fwd[i+3] === ' ' || fwd[i+3] === '\t' || fwd[i+3] === '\n' || fwd[i+3] === '\r' || fwd[i+3] === undefined))) - // Check for sequence marker "- ". - let isSeqMarker = fwd[i] === '-' && - (fwd[i+1] === ' ' || fwd[i+1] === '\t' || fwd[i+1] === '\n' || - fwd[i+1] === '\r' || fwd[i+1] === undefined) + // Check for sequence marker "- ". Only treat as a new sequence + // entry when the indent matches an enclosing sequence's level. + // Find the nearest "- " sequence marker preceding the text on + // the first line to determine the relevant sequence indent. + let isSeqMarker = false + if (fwd[i] === '-' && + (fwd[i+1] === ' ' || fwd[i+1] === '\t' || fwd[i+1] === '\n' || + fwd[i+1] === '\r' || fwd[i+1] === undefined)) { + // Determine the sequence indent from the first line's context. + // Look backward from pnt.sI to find "- " markers before the text. + let seqIndent = -1 + let si = pnt.sI - 1 + while (si >= lineStart) { + if (lex.src[si] === '-' && (lex.src[si+1] === ' ' || lex.src[si+1] === '\t')) { + seqIndent = si - lineStart + break + } + si-- + } + // isSeqMarker if the continuation "- " matches a known sequence + // indent, or if it's at the current line indent level. + isSeqMarker = (seqIndent >= 0 && lineIndent === seqIndent) || + (seqIndent < 0 && lineIndent <= currentLineIndent) + } let canContinue = inFlowCtx ? (i < fwd.length && fwd[i] !== '\n' && fwd[i] !== '\r' && fwd[i] !== '#' && fwd[i] !== '{' && fwd[i] !== '}' && diff --git a/test/yaml-test-suite.test.ts b/test/yaml-test-suite.test.ts index 2042455..78666bc 100644 --- a/test/yaml-test-suite.test.ts +++ b/test/yaml-test-suite.test.ts @@ -31,7 +31,6 @@ const SKIP: Record = { '8KB6': 'parse error', '9BXH': 'parse error', 'A2M4': 'output mismatch', - 'AB8U': 'output mismatch', 'CT4Q': 'parse error', 'JTV5': 'parse error', 'K3WX': 'parse error', From 26688533f9c6744cceba6a402ebf1aec7e7da562 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 12 Mar 2026 09:55:53 +0000 Subject: [PATCH 05/11] fix: correct column tracking for explicit key inline values (A2M4) The explicit key handler set pnt.cI = 1 after processing ": value", causing incorrect column info for the element marker. This broke multi-entry sequences as values of explicit keys. Fix by computing the actual column position on the value line. https://claude.ai/code/session_01H3rUS9E1u5eXZrzyYiBPB3 --- src/yaml.ts | 3 ++- test/yaml-test-suite.test.ts | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/yaml.ts b/src/yaml.ts index 111905f..531b1b9 100644 --- a/src/yaml.ts +++ b/src/yaml.ts @@ -1309,7 +1309,8 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { if (hasValue) { pnt.sI += valConsumed pnt.rI++ - pnt.cI = 1 + // Set column to actual position after `: ` on the value line (1-indexed). + pnt.cI = valConsumed - consumed + 1 // Has `: value` — emit KEY now, CL on next call. pendingExplicitCL = true } else { diff --git a/test/yaml-test-suite.test.ts b/test/yaml-test-suite.test.ts index 78666bc..e378ca9 100644 --- a/test/yaml-test-suite.test.ts +++ b/test/yaml-test-suite.test.ts @@ -30,7 +30,6 @@ const SKIP: Record = { '5WE3': 'parse error', '8KB6': 'parse error', '9BXH': 'parse error', - 'A2M4': 'output mismatch', 'CT4Q': 'parse error', 'JTV5': 'parse error', 'K3WX': 'parse error', From 42e49051aefa47abd7c0256508f840e76433123e Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 12 Mar 2026 10:03:03 +0000 Subject: [PATCH 06/11] Fix explicit key block scalars and multiline value continuation (5WE3, JTV5) - Handle block scalar indicators (| and >) as explicit key content, parsing indented continuation lines as literal/folded block scalar text - Fix keyIndent calculation in text.check to use current line indent (colon's line) instead of previous line indent for map value continuation https://claude.ai/code/session_01H3rUS9E1u5eXZrzyYiBPB3 --- src/yaml.ts | 65 ++++++++++++++++++++++++++++++++++-- test/yaml-test-suite.test.ts | 2 -- 2 files changed, 62 insertions(+), 5 deletions(-) diff --git a/src/yaml.ts b/src/yaml.ts index 531b1b9..4d2115b 100644 --- a/src/yaml.ts +++ b/src/yaml.ts @@ -487,7 +487,9 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { } // The minimum indent for continuation lines. - let minContinuationIndent = isMapValue ? keyIndent + 1 : currentLineIndent + // For map values, continuation indent is based on the colon's line indent, + // not the previous line's indent (which may be a key continuation line). + let minContinuationIndent = isMapValue ? currentLineIndent + 1 : currentLineIndent let text = '' let i = 0 let totalConsumed = 0 @@ -1253,7 +1255,61 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { while (li > 0 && lex.src[li-1] !== '\n' && lex.src[li-1] !== '\r') li-- while (li < pnt.sI && lex.src[li] === ' ') { qIndent++; li++ } } - // Scan continuation lines for key. + // Count extra rows consumed (for multiline keys). + let extraRows = 0 + + // Handle block scalar keys (| or >). + let blockScalarMatch = key.match(/^([|>])([+-]?)([0-9]?)$/) + if (blockScalarMatch) { + let isFolded = blockScalarMatch[1] === '>' + let chomp = blockScalarMatch[2] || '' + let explicitIndent = blockScalarMatch[3] ? parseInt(blockScalarMatch[3]) : 0 + // Collect block scalar content lines. + let blockLines: string[] = [] + let contentIndent = 0 + while (consumed < fwd.length) { + let lineIndent = 0 + while (consumed + lineIndent < fwd.length && fwd[consumed + lineIndent] === ' ') lineIndent++ + let afterSpaces = consumed + lineIndent + // Empty line or line with only spaces. + if (afterSpaces >= fwd.length || fwd[afterSpaces] === '\n' || fwd[afterSpaces] === '\r') { + blockLines.push('') + consumed = afterSpaces + if (consumed < fwd.length && fwd[consumed] === '\r') consumed++ + if (consumed < fwd.length && fwd[consumed] === '\n') consumed++ + extraRows++ + continue + } + // Determine content indent from first non-empty line. + if (contentIndent === 0) { + contentIndent = explicitIndent > 0 ? qIndent + explicitIndent : lineIndent + } + // Line must be indented more than ? to be content. + if (lineIndent < contentIndent) break + // Collect line content. + let lineEnd = afterSpaces + while (lineEnd < fwd.length && fwd[lineEnd] !== '\n' && fwd[lineEnd] !== '\r') lineEnd++ + blockLines.push(fwd.substring(consumed + contentIndent, lineEnd)) + consumed = lineEnd + if (consumed < fwd.length && fwd[consumed] === '\r') consumed++ + if (consumed < fwd.length && fwd[consumed] === '\n') consumed++ + extraRows++ + } + // Apply chomping. + // Remove trailing empty lines for non-keep. + if (chomp !== '+') { + while (blockLines.length > 0 && blockLines[blockLines.length - 1] === '') blockLines.pop() + } + if (isFolded) { + key = blockLines.join(' ') + '\n' + } else { + key = blockLines.join('\n') + '\n' + } + if (chomp === '-') { + key = key.replace(/\n$/, '') + } + } else { + // Scan continuation lines for key (plain scalar multiline). while (consumed < fwd.length) { // Skip comment lines. let lineIndent = 0 @@ -1265,6 +1321,7 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { beforeNewline = afterSpaces if (afterSpaces < fwd.length && fwd[afterSpaces] === '\r') afterSpaces++ if (afterSpaces < fwd.length && fwd[afterSpaces] === '\n') afterSpaces++ + extraRows++ consumed = afterSpaces continue } @@ -1285,10 +1342,12 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { beforeNewline = consumed if (consumed < fwd.length && fwd[consumed] === '\r') consumed++ if (consumed < fwd.length && fwd[consumed] === '\n') consumed++ + extraRows++ continue } break } + } // Now check if the next non-comment line starts with `:`. let hasValue = false let valConsumed = consumed @@ -1308,7 +1367,7 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { let src = fwd.substring(0, hasValue ? consumed : keyEnd) if (hasValue) { pnt.sI += valConsumed - pnt.rI++ + pnt.rI += 1 + extraRows // Set column to actual position after `: ` on the value line (1-indexed). pnt.cI = valConsumed - consumed + 1 // Has `: value` — emit KEY now, CL on next call. diff --git a/test/yaml-test-suite.test.ts b/test/yaml-test-suite.test.ts index e378ca9..81b5cf1 100644 --- a/test/yaml-test-suite.test.ts +++ b/test/yaml-test-suite.test.ts @@ -27,11 +27,9 @@ const SUITE_DIR = join(__dirname, '..', 'test', 'yaml-test-suite') // or edge cases where Jsonic's base grammar conflicts with YAML semantics. // As parser coverage improves, entries should be removed and tests should pass. const SKIP: Record = { - '5WE3': 'parse error', '8KB6': 'parse error', '9BXH': 'parse error', 'CT4Q': 'parse error', - 'JTV5': 'parse error', 'K3WX': 'parse error', } From c2d63a80e275354337b77ad0184bebdf9b517571 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 12 Mar 2026 10:13:10 +0000 Subject: [PATCH 07/11] Fix flow collection edge cases for full YAML Test Suite compliance (8KB6, 9BXH, CT4Q, K3WX) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add flow collection preprocessor to handle YAML-specific features that Jsonic's core parser doesn't natively support: - Implicit null-valued keys in flow mappings: {a, b: c} → {a: ~, b: c} - Comments between key and colon in flow context - Multiline quoted scalars in flow collections - Explicit keys (?) inside flow sequences All 374 YAML Test Suite tests now pass with 0 skipped. https://claude.ai/code/session_01H3rUS9E1u5eXZrzyYiBPB3 --- src/yaml.ts | 260 +++++++++++++++++++++++++++++++++++ test/yaml-test-suite.test.ts | 4 - 2 files changed, 260 insertions(+), 4 deletions(-) diff --git a/src/yaml.ts b/src/yaml.ts index 4d2115b..0bf8bb6 100644 --- a/src/yaml.ts +++ b/src/yaml.ts @@ -38,6 +38,258 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { // When !! is redefined, built-in type conversion is skipped. let tagHandles: Record = {} + // Preprocess flow collections for YAML-specific features. + // Transforms flow collection content to be Jsonic-compatible: + // - Implicit null-valued keys in flow mappings: {a, b: c} → {a: ~, b: c} + // - Comments between key and colon: {"foo" # comment\n :bar} → {"foo" :bar} + // - Multiline quoted scalars in flow context: {"multi\n line"} → {"multi line"} + // - Explicit keys (?) inside flow collections + function preprocessFlowCollections(src: string): string { + let result = '' + let i = 0 + + while (i < src.length) { + if (src[i] === '{' || src[i] === '[') { + // Only treat as flow collection if it's at a value position: + // after start of string, after newline+indent, after ": ", after "- ", + // after ",", after "[" or "{", or preceded only by whitespace on its line. + if (isFlowCollectionStart(src, i)) { + let processed = processFlowCollection(src, i) + result += processed.text + i = processed.end + continue + } + } + result += src[i] + i++ + } + return result + } + + // Determine if { or [ at position i is a flow collection opener. + function isFlowCollectionStart(src: string, i: number): boolean { + if (i === 0) return true + // Look backward to find the preceding meaningful character. + let j = i - 1 + while (j >= 0 && (src[j] === ' ' || src[j] === '\t')) j-- + if (j < 0) return true + let prev = src[j] + // After newline: it's a flow collection if it's the first thing on the line. + if (prev === '\n' || prev === '\r') return true + // After value/element/separator indicators. + if (prev === ':' || prev === '-' || prev === ',' || + prev === '[' || prev === '{') return true + return false + } + + function processFlowCollection(src: string, start: number): { text: string, end: number } { + let open = src[start] + let close = open === '{' ? '}' : ']' + let isMap = open === '{' + let out = open + let i = start + 1 + + // Track entries in flow mappings to detect implicit null-valued keys. + let entryHasColon = false + let entryParts: string[] = [] + + while (i < src.length) { + let ch = src[i] + + // Handle nested flow collections recursively. + if (ch === '{' || ch === '[') { + let nested = processFlowCollection(src, i) + if (isMap) { + entryParts.push(nested.text) + entryHasColon = true // nested structures count as values + } else { + out += nested.text + } + i = nested.end + continue + } + + // Handle quoted strings. + if (ch === '"') { + let str = '"' + i++ + while (i < src.length && src[i] !== '"') { + if (src[i] === '\\') { str += src[i]; i++ } + // Multiline double-quoted string: fold newlines into space. + if (src[i] === '\n' || src[i] === '\r') { + if (src[i] === '\r' && src[i + 1] === '\n') i++ + str += ' ' + i++ + while (i < src.length && (src[i] === ' ' || src[i] === '\t')) i++ + continue + } + str += src[i] + i++ + } + if (i < src.length) { str += '"'; i++ } + if (isMap) entryParts.push(str) + else out += str + continue + } + + if (ch === "'") { + let str = "'" + i++ + while (i < src.length) { + if (src[i] === "'" && src[i + 1] === "'") { str += "''"; i += 2; continue } + if (src[i] === "'") break + // Multiline single-quoted string: fold newlines into space. + if (src[i] === '\n' || src[i] === '\r') { + if (src[i] === '\r' && src[i + 1] === '\n') i++ + str += ' ' + i++ + while (i < src.length && (src[i] === ' ' || src[i] === '\t')) i++ + continue + } + str += src[i] + i++ + } + if (i < src.length) { str += "'"; i++ } + if (isMap) entryParts.push(str) + else out += str + continue + } + + // Handle comments: strip them in flow context. + if (ch === '#') { + // Treat as comment if preceded by whitespace or at start of line. + if (i > 0 && (src[i - 1] === ' ' || src[i - 1] === '\t' || + src[i - 1] === '\n' || src[i - 1] === '\r')) { + while (i < src.length && src[i] !== '\n' && src[i] !== '\r') i++ + if (isMap) entryParts.push(' ') + else out += ' ' + continue + } + } + + // Handle newlines in flow context: fold into space. + if (ch === '\n' || ch === '\r') { + if (ch === '\r' && src[i + 1] === '\n') i++ + i++ + // Skip leading whitespace on continuation line. + while (i < src.length && (src[i] === ' ' || src[i] === '\t')) i++ + if (isMap) entryParts.push(' ') + else out += ' ' + continue + } + + // Handle colon (key-value separator in flow mapping). + if (isMap && ch === ':' && (src[i + 1] === ' ' || src[i + 1] === '\t' || + src[i + 1] === ',' || src[i + 1] === '}' || src[i + 1] === ']' || + src[i + 1] === '\n' || src[i + 1] === '\r' || src[i + 1] === undefined)) { + entryHasColon = true + entryParts.push(ch) + i++ + continue + } + + // Handle adjacent colon (no space after) as key-value separator in flow. + if (isMap && ch === ':' && i > start + 1) { + // Check if preceded by a quoted string close in the accumulated parts. + let accumulated = entryParts.join('').trimEnd() + if (accumulated.endsWith('"') || accumulated.endsWith("'")) { + entryHasColon = true + } + entryParts.push(ch) + i++ + continue + } + + // Handle comma: end of entry. + if (ch === ',') { + if (isMap) { + let entry = entryParts.join('').trim() + if (!entryHasColon && entry.length > 0) { + out += entry + ': ~,' + } else { + out += entry + ',' + } + entryParts = [] + entryHasColon = false + } else { + out += ch + } + i++ + continue + } + + // Handle closing bracket. + if (ch === close) { + if (isMap) { + let entry = entryParts.join('').trim() + if (!entryHasColon && entry.length > 0) { + out += entry + ': ~' + } else { + out += entry + } + } + out += close + i++ + return { text: out, end: i } + } + + // Handle explicit key indicator in flow context. + // Only at the start of an entry (after open bracket/brace, comma, + // or after newline with only whitespace before it). + if (ch === '?' && (src[i + 1] === ' ' || src[i + 1] === '\t')) { + let isEntryStart = false + if (!isMap) { + // Check if ? is at entry start position in sequence. + let prevContent = out.trimEnd() + let lastChar = prevContent[prevContent.length - 1] + isEntryStart = lastChar === '[' || lastChar === ',' + } else { + let accumulated = entryParts.join('').trim() + isEntryStart = accumulated.length === 0 + } + if (isEntryStart && !isMap) { + // Convert [? key : val] → [{key: val}] + out += '{' + let inner = '' + i += 2 + while (i < src.length && src[i] !== ',' && src[i] !== close) { + if (src[i] === '\n' || src[i] === '\r') { + if (src[i] === '\r' && src[i + 1] === '\n') i++ + inner += ' ' + i++ + while (i < src.length && (src[i] === ' ' || src[i] === '\t')) i++ + continue + } + if (src[i] === '#') { + while (i < src.length && src[i] !== '\n' && src[i] !== '\r') i++ + continue + } + inner += src[i] + i++ + } + out += inner.trim() + '}' + continue + } else if (isEntryStart && isMap) { + // In flow mapping, ? is an explicit key indicator — skip it. + i += 2 + continue + } + } + + // Regular character. + if (isMap) entryParts.push(ch) + else out += ch + i++ + } + + // Unclosed collection — return what we have. + if (isMap) { + out += entryParts.join('') + } + out += close + return { text: out, end: i } + } + jsonic.options({ fixed: { token: { @@ -787,6 +1039,14 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { if (docStripped && /^(---|\.\.\.)(\s|$)/.test(src)) { src = '' } + // Preprocess flow collections for YAML-specific features + // that Jsonic's core parser doesn't handle natively: + // - Implicit null-valued keys in flow mappings: {a, b: c} + // - Comments between key and colon: {"foo" # comment\n :bar} + // - Multiline plain/quoted scalars in flow context + // - Explicit keys (?) inside flow collections + src = preprocessFlowCollections(src) + lex.src = src lex.pnt.len = src.length // If source is empty/whitespace/comments-only after preprocessing, diff --git a/test/yaml-test-suite.test.ts b/test/yaml-test-suite.test.ts index 81b5cf1..bcfcd8a 100644 --- a/test/yaml-test-suite.test.ts +++ b/test/yaml-test-suite.test.ts @@ -27,10 +27,6 @@ const SUITE_DIR = join(__dirname, '..', 'test', 'yaml-test-suite') // or edge cases where Jsonic's base grammar conflicts with YAML semantics. // As parser coverage improves, entries should be removed and tests should pass. const SKIP: Record = { - '8KB6': 'parse error', - '9BXH': 'parse error', - 'CT4Q': 'parse error', - 'K3WX': 'parse error', } From e17e50f2a07d3c5d0add2d199d1865cf79891eef Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 12 Mar 2026 14:24:00 +0000 Subject: [PATCH 08/11] Add Go implementation of YAML Jsonic grammar plugin Implements a Go version of the @jsonic.dev/yaml parser as a Jsonic plugin, following the same architecture as the CSV Go plugin. Includes: - yaml.go: Public API (Parse, MakeJsonic), helper functions, YAML value maps - plugin.go: Yaml plugin with custom matcher, TextCheck for block scalars/ tags/plain scalars, anchor/alias handling, quoted strings, indentation - grammar.go: YAML-specific grammar rules for block mappings, sequences, indent-based nesting, element markers, merge keys - yaml_test.go: 93 tests covering block mappings, sequences, scalar types, quoted strings, block scalars, flow collections, comments, anchors/aliases, merge keys, documents, tags, complex keys, directives, indentation, multiline scalars, CRLF, and real-world patterns https://claude.ai/code/session_01H3rUS9E1u5eXZrzyYiBPB3 --- go/go.mod | 5 + go/go.sum | 2 + go/grammar.go | 561 ++++++++++++++ go/plugin.go | 1967 +++++++++++++++++++++++++++++++++++++++++++++++ go/yaml.go | 204 +++++ go/yaml_test.go | 630 +++++++++++++++ 6 files changed, 3369 insertions(+) create mode 100644 go/go.mod create mode 100644 go/go.sum create mode 100644 go/grammar.go create mode 100644 go/plugin.go create mode 100644 go/yaml.go create mode 100644 go/yaml_test.go diff --git a/go/go.mod b/go/go.mod new file mode 100644 index 0000000..375e3d8 --- /dev/null +++ b/go/go.mod @@ -0,0 +1,5 @@ +module github.com/jsonicjs/yaml/go + +go 1.24.7 + +require github.com/jsonicjs/jsonic/go v0.1.4 diff --git a/go/go.sum b/go/go.sum new file mode 100644 index 0000000..dc99d17 --- /dev/null +++ b/go/go.sum @@ -0,0 +1,2 @@ +github.com/jsonicjs/jsonic/go v0.1.4 h1:V1KEzmg/jIwk25+JYj8ig1+B7190rHmH8WqZbT7XlgA= +github.com/jsonicjs/jsonic/go v0.1.4/go.mod h1:ObNKlCG7esWoi4AHCpdgkILvPINV8bpvkbCd4llGGUg= diff --git a/go/grammar.go b/go/grammar.go new file mode 100644 index 0000000..50f0c6e --- /dev/null +++ b/go/grammar.go @@ -0,0 +1,561 @@ +package yaml + +import ( + jsonic "github.com/jsonicjs/jsonic/go" +) + +// configureGrammarRules sets up YAML-specific grammar rules. +func configureGrammarRules(j *jsonic.Jsonic, IN, EL jsonic.Tin, KEY []jsonic.Tin, + CL, ZZ, CA, CS, CB, TX, ST, VL, NR jsonic.Tin, + anchors map[string]any, pendingAnchors *[]anchorInfo) { + + // ===== val rule ===== + j.Rule("val", func(rs *jsonic.RuleSpec) { + rs.PrependOpen( + // Indent followed by content: push indent rule. + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{IN}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + parentIn, hasParentIn := r.K["yamlIn"] + listIn, hasListIn := r.K["yamlListIn"] + if hasListIn && listIn != nil { + if listInVal, ok := toInt(listIn); ok { + if t0Val, ok := toInt(ctx.T0.Val); ok { + if t0Val <= listInVal { + return false + } + } + } + } + if !hasParentIn || parentIn == nil { + return true + } + if parentInVal, ok := toInt(parentIn); ok { + if t0Val, ok := toInt(ctx.T0.Val); ok { + return t0Val > parentInVal + } + } + return true + }, + P: "indent", + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + if v, ok := toInt(r.O0.Val); ok { + r.N["in"] = v + } + }, + }, + + // Same indent followed by element marker: list value at map level. + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{IN}, {EL}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + parentIn, hasParentIn := r.K["yamlIn"] + if !hasParentIn || parentIn == nil { + return false + } + if parentInVal, ok := toInt(parentIn); ok { + if t0Val, ok := toInt(ctx.T0.Val); ok { + return t0Val == parentInVal + } + } + return false + }, + P: "yamlBlockList", + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + if v, ok := toInt(r.O0.Val); ok { + r.N["in"] = v + } + }, + }, + + // End of input means empty value. + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{ZZ}}, + B: 1, + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + r.Node = nil + }, + }, + + // Same or lesser indent: empty value — backtrack. + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{IN}}, + B: 1, + U: map[string]any{"yamlEmpty": true}, + }, + + // This value is a list. + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{EL}}, + P: "yamlBlockList", + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + r.N["in"] = r.O0.CI - 1 + }, + }, + ) + + // After open: claim pending anchors. + rs.AddAO(func(r *jsonic.Rule, ctx *jsonic.Context) { + if len(*pendingAnchors) > 0 { + anchorsCopy := make([]anchorInfo, len(*pendingAnchors)) + copy(anchorsCopy, *pendingAnchors) + r.U["yamlAnchors"] = anchorsCopy + r.U["yamlAnchorOpenNode"] = r.Node + *pendingAnchors = (*pendingAnchors)[:0] + } + }) + + // Before close: follow replacement chain from child to get final node. + rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { + // Follow the replacement chain from the child to find the + // final sibling's Node (e.g., yamlBlockList → yamlBlockElem chain). + child := r.Child + if child != nil && child != jsonic.NoRule { + final := child + for final.Next != nil && final.Next != jsonic.NoRule && + final.Next.Prev == final { + final = final.Next + } + if final != child && !jsonic.IsUndefined(final.Node) { + r.Node = final.Node + } + } + }) + + // Before close: handle empty values. + rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { + if _, ok := r.U["yamlEmpty"]; ok { + r.Node = jsonic.Undefined + } + }) + + // Close on indent tokens. + rs.PrependClose( + &jsonic.AltSpec{S: [][]jsonic.Tin{{IN}}, B: 1}, + ) + + // After close: resolve aliases and record anchors. + rs.AddAC(func(r *jsonic.Rule, ctx *jsonic.Context) { + // Resolve alias markers. + if m, ok := r.Node.(map[string]any); ok { + if alias, ok := m["__yamlAlias"].(string); ok { + val, exists := anchors[alias] + if exists { + switch v := val.(type) { + case map[string]any, []any: + r.Node = deepCopy(v) + default: + r.Node = val + } + } + } + } + + // Record anchors. + if anchorList, ok := r.U["yamlAnchors"]; ok { + anchorsSlice, ok := anchorList.([]anchorInfo) + if ok { + for _, anchor := range anchorsSlice { + if anchor.inline { + openNode := r.U["yamlAnchorOpenNode"] + if openNode != nil { + switch openNode.(type) { + case map[string]any, []any: + // Don't overwrite with final compound value. + continue + } + } + } + val := r.Node + switch v := val.(type) { + case map[string]any, []any: + val = deepCopy(v) + } + anchors[anchor.name] = val + } + } + } + }) + }) + + // ===== indent rule ===== + j.Rule("indent", func(rs *jsonic.RuleSpec) { + rs.Clear() + rs.Open = []*jsonic.AltSpec{ + // Key pair → map. + {S: [][]jsonic.Tin{KEY, {CL}}, P: "map", B: 2}, + // Element → list. + {S: [][]jsonic.Tin{{EL}}, P: "list"}, + // Plain value after indent. + {S: [][]jsonic.Tin{KEY}, + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + if r.O0.Tin == ST || r.O0.Tin == TX { + r.Node = r.O0.Val + } else { + r.Node = r.O0.Src + } + }, + }, + } + rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { + if !jsonic.IsUndefined(r.Child.Node) { + r.Node = r.Child.Node + } + }) + }) + + // ===== yamlBlockList rule ===== + j.Rule("yamlBlockList", func(rs *jsonic.RuleSpec) { + rs.Clear() + rs.AddBO(func(r *jsonic.Rule, ctx *jsonic.Context) { + r.Node = make([]any, 0) + r.K["yamlBlockArr"] = r.Node + r.K["yamlListIn"] = r.N["in"] + }) + rs.Open = []*jsonic.AltSpec{ + {S: [][]jsonic.Tin{KEY, {CL}}, P: "yamlElemMap", B: 2, + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + r.K["yamlMapIn"] = r.N["in"] + 2 + }, + }, + {P: "val"}, + } + rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { + val := r.Child.Node + if jsonic.IsUndefined(val) { + val = nil + } + if arr, ok := r.K["yamlBlockArr"].([]any); ok { + arr = append(arr, val) + r.K["yamlBlockArr"] = arr + r.Node = arr + } + }) + rs.Close = []*jsonic.AltSpec{ + {S: [][]jsonic.Tin{{IN}, {EL}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(ctx.T0.Val); ok { + return v == r.N["in"] + } + return false + }, + R: "yamlBlockElem", + }, + {S: [][]jsonic.Tin{{IN}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(ctx.T0.Val); ok { + return v <= r.N["in"] + } + return false + }, + B: 1, + }, + {S: [][]jsonic.Tin{{EL}}, R: "yamlBlockElem"}, + {S: [][]jsonic.Tin{{ZZ}}, B: 1}, + } + }) + + // ===== yamlBlockElem rule ===== + j.Rule("yamlBlockElem", func(rs *jsonic.RuleSpec) { + rs.Clear() + rs.AddBO(func(r *jsonic.Rule, ctx *jsonic.Context) { + r.Node = r.K["yamlBlockArr"] + }) + rs.Open = []*jsonic.AltSpec{ + {S: [][]jsonic.Tin{KEY, {CL}}, P: "yamlElemMap", B: 2, + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + r.K["yamlMapIn"] = r.N["in"] + 2 + }, + }, + {P: "val"}, + } + rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { + val := r.Child.Node + if jsonic.IsUndefined(val) { + val = nil + } + if arr, ok := r.K["yamlBlockArr"].([]any); ok { + arr = append(arr, val) + r.K["yamlBlockArr"] = arr + r.Node = arr + } + }) + rs.Close = []*jsonic.AltSpec{ + {S: [][]jsonic.Tin{{IN}, {EL}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(ctx.T0.Val); ok { + return v == r.N["in"] + } + return false + }, + R: "yamlBlockElem", + }, + {S: [][]jsonic.Tin{{IN}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(ctx.T0.Val); ok { + return v <= r.N["in"] + } + return false + }, + B: 1, + }, + {S: [][]jsonic.Tin{{EL}}, R: "yamlBlockElem"}, + {S: [][]jsonic.Tin{{ZZ}}, B: 1}, + } + }) + + // ===== list rule amendments ===== + j.Rule("list", func(rs *jsonic.RuleSpec) { + rs.AddBO(func(r *jsonic.Rule, ctx *jsonic.Context) { + r.K["yamlListIn"] = r.N["in"] + }) + rs.PrependClose( + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{IN}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(ctx.T0.Val); ok { + return v <= r.N["in"] + } + return false + }, + B: 1, + }, + ) + }) + + // ===== map rule amendments ===== + j.Rule("map", func(rs *jsonic.RuleSpec) { + rs.AddBO(func(r *jsonic.Rule, ctx *jsonic.Context) { + if _, ok := r.N["in"]; !ok { + r.N["in"] = 0 + } + r.K["yamlIn"] = r.N["in"] + }) + rs.PrependOpen( + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{IN}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(r.O0.Val); ok { + return v == r.N["in"] + } + return false + }, + R: "pair", + }, + ) + // Handle merge keys. + rs.AddAC(func(r *jsonic.Rule, ctx *jsonic.Context) { + m, ok := r.Node.(map[string]any) + if !ok { + return + } + mergeVal, hasMerge := m["<<"] + if !hasMerge { + return + } + delete(m, "<<") + switch mv := mergeVal.(type) { + case []any: + for _, item := range mv { + if mm, ok := item.(map[string]any); ok { + for k, v := range mm { + if _, exists := m[k]; !exists { + m[k] = v + } + } + } + } + case map[string]any: + for k, v := range mv { + if _, exists := m[k]; !exists { + m[k] = v + } + } + } + }) + rs.PrependClose( + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{IN}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(ctx.T0.Val); ok { + return v < r.N["in"] + } + return false + }, + B: 1, + }, + ) + }) + + // ===== pair rule amendments ===== + j.Rule("pair", func(rs *jsonic.RuleSpec) { + rs.PrependOpen( + &jsonic.AltSpec{S: [][]jsonic.Tin{{ZZ}}, B: 1}, + ) + rs.PrependClose( + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{IN}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(ctx.T0.Val); ok { + return v == r.N["in"] + } + return false + }, + R: "pair", + }, + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{IN}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(ctx.T0.Val); ok { + return v < r.N["in"] + } + return false + }, + B: 1, + }, + ) + }) + + // ===== yamlElemMap rule ===== + j.Rule("yamlElemMap", func(rs *jsonic.RuleSpec) { + rs.Clear() + rs.AddBO(func(r *jsonic.Rule, ctx *jsonic.Context) { + r.Node = make(map[string]any) + }) + rs.Open = []*jsonic.AltSpec{ + {S: [][]jsonic.Tin{KEY, {CL}}, P: "val", + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + r.U["key"] = extractKey(r.O0, anchors) + }, + }, + } + rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { + if key := r.U["key"]; key != nil { + if m, ok := r.Node.(map[string]any); ok { + val := r.Child.Node + if jsonic.IsUndefined(val) { + val = nil + } + m[formatKey(key)] = val + } + } + }) + rs.Close = []*jsonic.AltSpec{ + {S: [][]jsonic.Tin{{IN}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(ctx.T0.Val); ok { + if mapIn, ok := toInt(r.K["yamlMapIn"]); ok { + return v == mapIn + } + } + return false + }, + R: "yamlElemPair", + }, + {S: [][]jsonic.Tin{{IN}}, B: 1}, + {S: [][]jsonic.Tin{{CA}}, B: 1}, + {S: [][]jsonic.Tin{{CS}}, B: 1}, + {S: [][]jsonic.Tin{{CB}}, B: 1}, + {S: [][]jsonic.Tin{{ZZ}}}, + } + }) + + // ===== yamlElemPair rule ===== + j.Rule("yamlElemPair", func(rs *jsonic.RuleSpec) { + rs.Clear() + rs.Open = []*jsonic.AltSpec{ + {S: [][]jsonic.Tin{KEY, {CL}}, P: "val", + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + r.U["key"] = extractKey(r.O0, anchors) + }, + }, + } + rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { + if key := r.U["key"]; key != nil { + if m, ok := r.Node.(map[string]any); ok { + val := r.Child.Node + if jsonic.IsUndefined(val) { + val = nil + } + m[formatKey(key)] = val + } + } + }) + rs.Close = []*jsonic.AltSpec{ + {S: [][]jsonic.Tin{{IN}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(ctx.T0.Val); ok { + if mapIn, ok := toInt(r.K["yamlMapIn"]); ok { + return v == mapIn + } + } + return false + }, + R: "yamlElemPair", + }, + {S: [][]jsonic.Tin{{IN}}, B: 1}, + {S: [][]jsonic.Tin{{CA}}, B: 1}, + {S: [][]jsonic.Tin{{CS}}, B: 1}, + {S: [][]jsonic.Tin{{CB}}, B: 1}, + {S: [][]jsonic.Tin{{ZZ}}}, + } + }) + + // ===== elem rule amendments ===== + j.Rule("elem", func(rs *jsonic.RuleSpec) { + rs.PrependOpen( + &jsonic.AltSpec{S: [][]jsonic.Tin{KEY, {CL}}, P: "yamlElemMap", B: 2, + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + r.K["yamlMapIn"] = r.N["in"] + 2 + }, + }, + ) + rs.PrependClose( + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{IN}, {EL}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(ctx.T0.Val); ok { + return v == r.N["in"] + } + return false + }, + R: "elem", + }, + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{IN}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(ctx.T0.Val); ok { + return v == r.N["in"] + } + return false + }, + B: 1, + }, + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{IN}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(ctx.T0.Val); ok { + return v < r.N["in"] + } + return false + }, + B: 1, + }, + &jsonic.AltSpec{S: [][]jsonic.Tin{{EL}}, R: "elem"}, + ) + }) +} + +// toInt converts an any value to int. +func toInt(v any) (int, bool) { + switch n := v.(type) { + case int: + return n, true + case float64: + return int(n), true + case int64: + return int(n), true + default: + return 0, false + } +} diff --git a/go/plugin.go b/go/plugin.go new file mode 100644 index 0000000..ddef5bc --- /dev/null +++ b/go/plugin.go @@ -0,0 +1,1967 @@ +package yaml + +import ( + "regexp" + "strconv" + "strings" + + jsonic "github.com/jsonicjs/jsonic/go" +) + +// Yaml is a jsonic plugin that adds YAML parsing support. +func Yaml(j *jsonic.Jsonic, _ map[string]any) { + TX := j.Token("#TX") + NR := j.Token("#NR") + ST := j.Token("#ST") + VL := j.Token("#VL") + CL := j.Token("#CL") + ZZ := j.Token("#ZZ") + CA := j.Token("#CA") + CS := j.Token("#CS") + CB := j.Token("#CB") + + // Register custom tokens. + IN := j.Token("#IN") // Indent token + EL := j.Token("#EL") // Element marker (- ) + + KEY := []jsonic.Tin{TX, NR, ST, VL} + + // Shared state for the plugin instance. + anchors := make(map[string]any) + var pendingAnchors []anchorInfo + pendingExplicitCL := false + var pendingTokens []*jsonic.Token + tagHandles := make(map[string]string) + + // Remove colon as a fixed token — YAML uses ": " (colon-space). + cfg := j.Config() + delete(cfg.FixedTokens, ":") + cfg.SortFixedTokens() + + // Add colon as an ender char so text tokens stop at ":". + if cfg.EnderChars == nil { + cfg.EnderChars = make(map[rune]bool) + } + cfg.EnderChars[':'] = true + + // ===== TextCheck: handles block scalars, !!tags, and plain scalars ===== + cfg.TextCheck = func(lex *jsonic.Lex) *jsonic.LexCheckResult { + pnt := lex.Cursor() + src := lex.Src + fwd := src[pnt.SI:] + if len(fwd) == 0 { + return nil + } + ch := fwd[0] + + // Block scalar: | or > + if ch == '|' || ch == '>' { + return handleBlockScalar(lex, pnt, src, fwd, ch) + } + + // !!type tags in text check context + if ch == '!' && len(fwd) > 1 && fwd[1] == '!' { + return handleTagInTextCheck(lex, pnt, fwd, tagHandles) + } + + // Skip special chars that should be handled by other matchers. + if ch == '{' || ch == '}' || ch == '[' || ch == ']' || + ch == ',' || ch == '#' || ch == '\n' || ch == '\r' || + ch == '"' || ch == '\'' || ch == '*' || ch == '&' || ch == '!' { + return nil + } + + // Colon followed by space/tab/newline/eof is a separator, not text. + if ch == ':' && (len(fwd) < 2 || fwd[1] == ' ' || fwd[1] == '\t' || fwd[1] == '\n' || fwd[1] == '\r') { + return nil + } + + // Plain scalar — scan to end of line, handling multiline continuation. + return handlePlainScalar(lex, pnt, src, fwd) + } + + // ===== Custom YAML matcher (priority 500000 — before fixed tokens) ===== + srcCleaned := false + + j.AddMatcher("yaml", 500000, func(lex *jsonic.Lex) *jsonic.Token { + pnt := lex.Cursor() + src := lex.Src + + // First call: clean source (strip directives, initial ---). + if !srcCleaned { + srcCleaned = true + cleaned := cleanSource(src, tagHandles) + if cleaned != src { + lex.Src = cleaned + pnt.Len = len(cleaned) + } + } + + if pnt.SI >= pnt.Len { + return nil + } + + // Emit pending tokens (from explicit key handling). + if len(pendingTokens) > 0 { + tkn := pendingTokens[0] + pendingTokens = pendingTokens[1:] + return tkn + } + + // Emit pending explicit CL token. + if pendingExplicitCL { + pendingExplicitCL = false + tkn := lex.Token("#CL", CL, 1, ": ") + return tkn + } + + fwd := lex.Src[pnt.SI:] + if len(fwd) == 0 { + return nil + } + + // Process YAML features in a loop to handle chaining. + for { + if pnt.SI >= pnt.Len { + return nil + } + fwd = lex.Src[pnt.SI:] + if len(fwd) == 0 { + return nil + } + + // Alias: *name + if fwd[0] == '*' { + nameEnd := 1 + for nameEnd < len(fwd) && fwd[nameEnd] != ' ' && fwd[nameEnd] != '\t' && + fwd[nameEnd] != '\n' && fwd[nameEnd] != '\r' && fwd[nameEnd] != ',' && + fwd[nameEnd] != '{' && fwd[nameEnd] != '}' && fwd[nameEnd] != '[' && + fwd[nameEnd] != ']' { + nameEnd++ + } + aliasName := fwd[1:nameEnd] + if val, ok := anchors[aliasName]; ok { + var tkn *jsonic.Token + switch v := val.(type) { + case string: + tkn = lex.Token("#TX", TX, v, fwd[:nameEnd]) + case float64: + tkn = lex.Token("#NR", NR, v, fwd[:nameEnd]) + case bool: + tkn = lex.Token("#VL", VL, v, fwd[:nameEnd]) + case nil: + tkn = lex.Token("#VL", VL, nil, fwd[:nameEnd]) + default: + // Complex value — use alias marker for later resolution. + tkn = lex.Token("#VL", VL, map[string]any{"__yamlAlias": aliasName}, fwd[:nameEnd]) + } + pnt.SI += nameEnd + pnt.CI += nameEnd + return tkn + } + // Unknown alias — return as marker. + tkn := lex.Token("#VL", VL, map[string]any{"__yamlAlias": aliasName}, fwd[:nameEnd]) + pnt.SI += nameEnd + pnt.CI += nameEnd + return tkn + } + + // Anchor: &name + if fwd[0] == '&' { + nameEnd := 1 + for nameEnd < len(fwd) && fwd[nameEnd] != ' ' && fwd[nameEnd] != '\t' && + fwd[nameEnd] != '\n' && fwd[nameEnd] != '\r' && fwd[nameEnd] != ',' && + fwd[nameEnd] != '{' && fwd[nameEnd] != '}' && fwd[nameEnd] != '[' && + fwd[nameEnd] != ']' { + nameEnd++ + } + anchorName := fwd[1:nameEnd] + anchorInline := true + + // Check if anchor is standalone (nothing meaningful after it on the line). + afterAnchor := nameEnd + for afterAnchor < len(fwd) && (fwd[afterAnchor] == ' ' || fwd[afterAnchor] == '\t') { + afterAnchor++ + } + isStandalone := afterAnchor >= len(fwd) || fwd[afterAnchor] == '\n' || + fwd[afterAnchor] == '\r' || fwd[afterAnchor] == '#' + + if isStandalone { + anchorInline = false + } + + // Try to capture inline scalar value for the anchor. + if anchorInline && afterAnchor < len(fwd) { + peek := fwd[afterAnchor:] + var scalarVal any + pch := byte(0) + if len(peek) > 0 { + pch = peek[0] + } + if pch == '"' { + ei := 1 + for ei < len(peek) && peek[ei] != '"' { + if peek[ei] == '\\' { + ei++ + } + ei++ + } + raw := peek[1:ei] + raw = strings.ReplaceAll(raw, "\\n", "\n") + raw = strings.ReplaceAll(raw, "\\t", "\t") + raw = strings.ReplaceAll(raw, "\\\\", "\\") + raw = strings.ReplaceAll(raw, `\"`, `"`) + scalarVal = raw + } else if pch == '\'' { + ei := 1 + for ei < len(peek) && peek[ei] != '\'' { + if ei+1 < len(peek) && peek[ei] == '\'' && peek[ei+1] == '\'' { + ei++ + } + ei++ + } + raw := peek[1:ei] + raw = strings.ReplaceAll(raw, "''", "'") + scalarVal = raw + } else if pch != 0 && pch != '{' && pch != '[' && pch != '\n' && pch != '\r' { + ei := 0 + for ei < len(peek) && peek[ei] != '\n' && peek[ei] != '\r' && + peek[ei] != ',' && peek[ei] != '}' && peek[ei] != ']' { + if peek[ei] == ':' && (ei+1 >= len(peek) || peek[ei+1] == ' ' || + peek[ei+1] == '\t' || peek[ei+1] == '\n' || peek[ei+1] == '\r') { + break + } + if peek[ei] == ' ' && ei+1 < len(peek) && peek[ei+1] == '#' { + break + } + ei++ + } + raw := strings.TrimRight(peek[:ei], " \t") + if len(raw) > 0 { + scalarVal = raw + } + } + if scalarVal != nil { + anchors[anchorName] = scalarVal + } + } + + pendingAnchors = append(pendingAnchors, anchorInfo{name: anchorName, inline: anchorInline}) + + // Consume the anchor name (and trailing spaces, but NOT the newline). + skip := nameEnd + for skip < len(fwd) && (fwd[skip] == ' ' || fwd[skip] == '\t') { + skip++ + } + // Skip comments after anchor. + if skip < len(fwd) && fwd[skip] == '#' { + for skip < len(fwd) && fwd[skip] != '\n' && fwd[skip] != '\r' { + skip++ + } + } + pnt.SI += skip + pnt.CI += skip + + continue // Re-loop to process what follows the anchor + } + + // Directive lines (%YAML, %TAG, etc.): skip to --- + if fwd[0] == '%' { + pos := 0 + for pos < len(fwd) { + if isDocMarker(fwd, pos) { + break + } + for pos < len(fwd) && fwd[pos] != '\n' && fwd[pos] != '\r' { + pos++ + } + if pos < len(fwd) && fwd[pos] == '\r' { + pos++ + } + if pos < len(fwd) && fwd[pos] == '\n' { + pos++ + } + pnt.RI++ + } + pnt.SI += pos + pnt.CI = 0 + continue + } + + // Non-specific tag: ! value + if fwd[0] == '!' && len(fwd) > 1 && fwd[1] != '!' { + if fwd[1] == ' ' { + // Non-specific tag: ! value + valStart := 2 + valEnd := valStart + for valEnd < len(fwd) && fwd[valEnd] != '\n' && fwd[valEnd] != '\r' { + valEnd++ + } + rawVal := trimRight(fwd[valStart:valEnd]) + tkn := lex.Token("#TX", TX, rawVal, fwd[:valEnd]) + pnt.SI += valEnd + pnt.CI += valEnd + return tkn + } + // Local tag: !name value — skip the tag. + tagEnd := 1 + for tagEnd < len(fwd) && fwd[tagEnd] != ' ' && fwd[tagEnd] != '\n' && fwd[tagEnd] != '\r' { + tagEnd++ + } + if tagEnd < len(fwd) && fwd[tagEnd] == ' ' { + tagEnd++ + } + pnt.SI += tagEnd + pnt.CI += tagEnd + // If tag is standalone, consume newline + spaces. + if pnt.SI < pnt.Len && (lex.Src[pnt.SI] == '\n' || lex.Src[pnt.SI] == '\r') { + tagStandalone := true + tagLineIndent := 0 + tbi := pnt.SI - tagEnd - 1 + for tbi >= 0 && lex.Src[tbi] != '\n' && lex.Src[tbi] != '\r' { + if lex.Src[tbi] != ' ' && lex.Src[tbi] != '\t' { + tagStandalone = false + break + } + tagLineIndent++ + tbi-- + } + _ = tagLineIndent + if tagStandalone { + nl := pnt.SI + if nl < pnt.Len && lex.Src[nl] == '\r' { + nl++ + } + if nl < pnt.Len && lex.Src[nl] == '\n' { + nl++ + } + spaces := 0 + for nl+spaces < pnt.Len && lex.Src[nl+spaces] == ' ' { + spaces++ + } + pnt.SI = nl + spaces + pnt.CI = spaces + pnt.RI++ + } + } + continue + } + + // !!seq, !!map, !!omap, etc. structural tags — skip them. + structTagRe := regexp.MustCompile(`^!!(seq|map|omap|set|pairs|binary|ordered|python/\S*)`) + if fwd[0] == '!' && len(fwd) > 1 && fwd[1] == '!' && structTagRe.MatchString(fwd) { + skip := 2 + for skip < len(fwd) && fwd[skip] != ' ' && fwd[skip] != '\n' { + skip++ + } + for skip < len(fwd) && fwd[skip] == ' ' { + skip++ + } + // If standalone, consume newline. + tagIndent := 0 + tbi := pnt.SI - 1 + standalone := true + for tbi >= 0 && lex.Src[tbi] != '\n' && lex.Src[tbi] != '\r' { + if lex.Src[tbi] != ' ' && lex.Src[tbi] != '\t' { + standalone = false + break + } + tagIndent++ + tbi-- + } + if standalone && skip < len(fwd) && (fwd[skip] == '\n' || fwd[skip] == '\r') { + nl := skip + if nl < len(fwd) && fwd[nl] == '\r' { + nl++ + } + if nl < len(fwd) && fwd[nl] == '\n' { + nl++ + } + spaces := 0 + for nl+spaces < len(fwd) && fwd[nl+spaces] == ' ' { + spaces++ + } + if spaces >= tagIndent { + skip = nl + spaces + pnt.SI += skip + pnt.CI = spaces + pnt.RI++ + continue + } + } + pnt.SI += skip + pnt.CI += skip + continue + } + + // !!type tags (!!str, !!int, !!float, !!bool, !!null). + if fwd[0] == '!' && len(fwd) > 1 && fwd[1] == '!' { + return handleTypeTag(lex, pnt, fwd, tagHandles, &pendingAnchors, anchors, TX, NR, VL, ST) + } + + // Explicit key: ? key + if fwd[0] == '?' && (len(fwd) < 2 || fwd[1] == ' ' || fwd[1] == '\t' || + fwd[1] == '\n' || fwd[1] == '\r') { + return handleExplicitKey(lex, pnt, fwd, &pendingExplicitCL, &pendingTokens, TX, CL, VL) + } + + // Document markers: --- and ... + if isDocMarker(fwd, 0) { + return handleDocMarker(lex, pnt, fwd, IN, &pendingAnchors, anchors, TX) + } + + // Re-check patterns after --- fall-through. + if fwd[0] == '%' { + pos := 0 + for pos < len(fwd) { + if isDocMarker(fwd, pos) { + break + } + for pos < len(fwd) && fwd[pos] != '\n' && fwd[pos] != '\r' { + pos++ + } + if pos < len(fwd) && fwd[pos] == '\r' { + pos++ + } + if pos < len(fwd) && fwd[pos] == '\n' { + pos++ + } + pnt.RI++ + } + pnt.SI += pos + pnt.CI = 0 + continue + } + + // Non-specific tag after ---. + if fwd[0] == '!' && len(fwd) > 1 && fwd[1] == ' ' { + valStart := 2 + valEnd := valStart + for valEnd < len(fwd) && fwd[valEnd] != '\n' && fwd[valEnd] != '\r' { + valEnd++ + } + rawVal := trimRight(fwd[valStart:valEnd]) + tkn := lex.Token("#TX", TX, rawVal, fwd[:valEnd]) + pnt.SI += valEnd + pnt.CI += valEnd + return tkn + } + + // Anchor after --- fall-through. + if fwd[0] == '&' { + continue // Will be handled at top of loop + } + + // YAML double-quoted string. + if fwd[0] == '"' { + return handleDoubleQuotedString(lex, pnt, fwd, ST) + } + + // YAML single-quoted string. + if fwd[0] == '\'' { + return handleSingleQuotedString(lex, pnt, fwd, ST) + } + + // Plain scalars starting with digits that contain colons (e.g. 20:03:20). + if fwd[0] >= '0' && fwd[0] <= '9' { + if tkn := handleNumericColon(lex, pnt, fwd, TX); tkn != nil { + return tkn + } + } + + // Element marker: - (followed by space/tab/newline/eof) + if fwd[0] == '-' && (len(fwd) < 2 || fwd[1] == ' ' || fwd[1] == '\t' || + fwd[1] == '\n' || fwd[1] == '\r') { + tkn := lex.Token("#EL", EL, nil, "- ") + pnt.SI++ + pnt.CI++ + if len(fwd) > 1 && (fwd[1] == ' ' || fwd[1] == '\t') { + pnt.SI++ + pnt.CI++ + } + return tkn + } + + // YAML colon: ": ", ":\t", ":\n", ":" at end. + isFlowColon := false + if fwd[0] == ':' && len(fwd) > 1 && fwd[1] != ' ' && fwd[1] != '\t' && + fwd[1] != '\n' && fwd[1] != '\r' { + prevI := pnt.SI - 1 + for prevI >= 0 && (lex.Src[prevI] == ' ' || lex.Src[prevI] == '\t' || + lex.Src[prevI] == '\n' || lex.Src[prevI] == '\r') { + prevI-- + } + if prevI >= 0 && (lex.Src[prevI] == '"' || lex.Src[prevI] == '\'') { + isFlowColon = true + } + } + if fwd[0] == ':' && (len(fwd) < 2 || fwd[1] == ' ' || fwd[1] == '\t' || + fwd[1] == '\n' || fwd[1] == '\r' || isFlowColon) { + tkn := lex.Token("#CL", CL, 1, ": ") + pnt.SI++ + if len(fwd) > 1 && (fwd[1] == ' ' || fwd[1] == '\t') { + pnt.CI += 2 + } else if len(fwd) > 1 && (fwd[1] == '\n' || fwd[1] == '\r') { + // Don't consume newline. + } else { + pnt.CI++ + } + return tkn + } + + // Newline handling — YAML indentation is significant. + if fwd[0] == '\n' || fwd[0] == '\r' { + // Check if we're inside a flow collection. + inFlow := 0 + for fi := 0; fi < pnt.SI; fi++ { + fc := lex.Src[fi] + if fc == '{' || fc == '[' { + inFlow++ + } else if fc == '}' || fc == ']' { + if inFlow > 0 { + inFlow-- + } + } else if fc == '"' { + fi++ + for fi < pnt.SI && lex.Src[fi] != '"' { + if lex.Src[fi] == '\\' { + fi++ + } + fi++ + } + } else if fc == '\'' { + fi++ + for fi < pnt.SI && lex.Src[fi] != '\'' { + if fi+1 < pnt.SI && lex.Src[fi] == '\'' && lex.Src[fi+1] == '\'' { + fi++ + } + fi++ + } + } + } + if inFlow > 0 { + // Inside flow collection — consume whitespace. + pos := 0 + for pos < len(fwd) && (fwd[pos] == '\n' || fwd[pos] == '\r' || + fwd[pos] == ' ' || fwd[pos] == '\t') { + pos++ + } + if pos < len(fwd) && fwd[pos] == '#' { + for pos < len(fwd) && fwd[pos] != '\n' && fwd[pos] != '\r' { + pos++ + } + } + pnt.SI += pos + pnt.CI = 0 + continue + } + + // Block context newline — emit #IN with indent level. + pos := 0 + spaces := 0 + rows := 0 + for pos < len(fwd) { + if fwd[pos] == '\r' && pos+1 < len(fwd) && fwd[pos+1] == '\n' { + pos += 2 + rows++ + } else if fwd[pos] == '\n' { + pos++ + rows++ + } else { + break + } + spaces = 0 + for pos < len(fwd) && fwd[pos] == ' ' { + pos++ + spaces++ + } + // Comment-only line — skip. + if pos < len(fwd) && fwd[pos] == '#' { + for pos < len(fwd) && fwd[pos] != '\n' && fwd[pos] != '\r' { + pos++ + } + continue + } + // Tab-only line — skip. + if pos < len(fwd) && fwd[pos] == '\t' { + tp := pos + for tp < len(fwd) && (fwd[tp] == ' ' || fwd[tp] == '\t') { + tp++ + } + if tp >= len(fwd) || fwd[tp] == '\n' || fwd[tp] == '\r' { + pos = tp + continue + } + } + // Anchor-only line. + if pos < len(fwd) && fwd[pos] == '&' { + ae := pos + 1 + for ae < len(fwd) && fwd[ae] != ' ' && fwd[ae] != '\t' && + fwd[ae] != '\n' && fwd[ae] != '\r' { + ae++ + } + afterAnchor := ae + for afterAnchor < len(fwd) && (fwd[afterAnchor] == ' ' || fwd[afterAnchor] == '\t') { + afterAnchor++ + } + if afterAnchor >= len(fwd) || fwd[afterAnchor] == '\n' || + fwd[afterAnchor] == '\r' || fwd[afterAnchor] == '#' { + pendingAnchors = append(pendingAnchors, anchorInfo{name: fwd[pos+1 : ae], inline: false}) + for afterAnchor < len(fwd) && fwd[afterAnchor] != '\n' && fwd[afterAnchor] != '\r' { + afterAnchor++ + } + pos = afterAnchor + continue + } + } + } + + // Consumed everything — emit ZZ. + if pos >= len(fwd) { + pnt.SI += pos + pnt.RI += rows + pnt.CI = spaces + 1 + tkn := lex.Token("#ZZ", ZZ, jsonic.Undefined, "") + return tkn + } + + // Emit #IN with indent level. + tkn := lex.Token("#IN", IN, spaces, fwd[:pos]) + pnt.SI += pos + pnt.RI += rows + pnt.CI = spaces + 1 + return tkn + } + + break // End of yamlMatchLoop + } + + return nil + }) + + // ===== Grammar rules ===== + configureGrammarRules(j, IN, EL, KEY, CL, ZZ, CA, CS, CB, TX, ST, VL, NR, + anchors, &pendingAnchors) +} + +// cleanSource strips YAML directives and initial document markers from source. +func cleanSource(src string, tagHandles map[string]string) string { + if len(src) == 0 { + return src + } + + // Remove leading directive block. + if src[0] == '%' { + dIdx := strings.Index(src, "\n---") + if dIdx >= 0 { + dirBlock := src[:dIdx] + for _, dl := range strings.Split(dirBlock, "\n") { + tagMatch := regexp.MustCompile(`^%TAG\s+(\S+)\s+(\S+)`).FindStringSubmatch(dl) + if tagMatch != nil { + tagHandles[tagMatch[1]] = tagMatch[2] + } + } + src = src[dIdx+1:] + } + } + + // Strip leading comment lines before ---. + for { + commentRe := regexp.MustCompile(`^[ \t]*#[^\n]*\n`) + if !commentRe.MatchString(src) || !strings.Contains(src, "\n---") { + break + } + src = commentRe.ReplaceAllString(src, "") + } + + // Handle document start marker (---). + docRe := regexp.MustCompile(`^---([ \t]+(.+))?(\r?\n|$)`) + docMatch := docRe.FindStringSubmatch(src) + if docMatch != nil { + prefix := "" + if len(docMatch) > 2 { + prefix = docMatch[2] + } + rest := src[len(docMatch[0]):] + trimmed := strings.TrimLeft(prefix, " \t") + + if len(trimmed) > 0 && (trimmed[0] == '>' || trimmed[0] == '|') { + // Leave --- in place for block scalar context. + } else if prefix != "" && (len(trimmed) == 0 || trimmed[0] != '#') { + structTagRe := regexp.MustCompile(`^!!(seq|map|omap|set|pairs|binary|ordered)\s*$`) + if structTagRe.MatchString(trimmed) { + src = rest + } else { + suffix := "" + if len(docMatch) > 3 { + suffix = docMatch[3] + } + src = prefix + suffix + rest + } + } else { + src = rest + } + } + + // Handle document end marker (... at end of source). + dotRe := regexp.MustCompile(`\n\.\.\.\s*(\r?\n.*)?$`) + if dotRe.MatchString(src) { + loc := dotRe.FindStringIndex(src) + if loc != nil { + src = src[:loc[0]] + } + } + + return src +} + +// handleBlockScalar processes | and > block scalar indicators. +func handleBlockScalar(lex *jsonic.Lex, pnt *jsonic.Point, src, fwd string, ch byte) *jsonic.LexCheckResult { + fold := ch == '>' + chomp := "clip" + explicitIndent := 0 + idx := 1 + + // Parse chomping and indent indicators. + for pi := 0; pi < 2 && idx < len(fwd); pi++ { + if fwd[idx] == '+' { + chomp = "keep" + idx++ + } else if fwd[idx] == '-' { + chomp = "strip" + idx++ + } else if fwd[idx] >= '1' && fwd[idx] <= '9' { + explicitIndent = int(fwd[idx] - '0') + idx++ + } + } + + // Skip trailing spaces and comments. + for idx < len(fwd) && fwd[idx] == ' ' { + idx++ + } + if idx < len(fwd) && fwd[idx] == '#' { + for idx < len(fwd) && fwd[idx] != '\n' && fwd[idx] != '\r' { + idx++ + } + } + + // Must be followed by newline or eof. + if idx < len(fwd) && fwd[idx] != '\n' && fwd[idx] != '\r' { + return nil // Not a block scalar. + } + + // Skip the indicator line's newline. + if idx < len(fwd) && fwd[idx] == '\r' { + idx++ + } + if idx < len(fwd) && fwd[idx] == '\n' { + idx++ + } + + // Determine block indent. + blockIndent := 0 + if explicitIndent == 0 { + // Auto-detect from first content line. + tempIdx := idx + for tempIdx < len(fwd) { + lineSpaces := 0 + for tempIdx+lineSpaces < len(fwd) && fwd[tempIdx+lineSpaces] == ' ' { + lineSpaces++ + } + afterSpaces := tempIdx + lineSpaces + if afterSpaces >= len(fwd) || fwd[afterSpaces] == '\n' || fwd[afterSpaces] == '\r' { + tempIdx = afterSpaces + if tempIdx < len(fwd) && fwd[tempIdx] == '\r' { + tempIdx++ + } + if tempIdx < len(fwd) && fwd[tempIdx] == '\n' { + tempIdx++ + } + continue + } + blockIndent = lineSpaces + break + } + } + + // Determine containing indent. + containingIndent := 0 + isDocStart := false + li := pnt.SI - 1 + for li > 0 && src[li-1] != '\n' && src[li-1] != '\r' { + li-- + } + lineStart := li + for li < pnt.SI && src[li] == ' ' { + containingIndent++ + li++ + } + if lineStart+2 < len(src) && src[lineStart] == '-' && src[lineStart+1] == '-' && src[lineStart+2] == '-' { + isDocStart = true + } + + // Apply explicit indent. + if explicitIndent > 0 { + hasColonOnLine := false + for ci := lineStart + containingIndent; ci < pnt.SI; ci++ { + if src[ci] == ':' && ci+1 < len(src) && (src[ci+1] == ' ' || src[ci+1] == '\t') { + hasColonOnLine = true + break + } + } + keyCol := containingIndent + if hasColonOnLine { + scanI := lineStart + containingIndent + for scanI < pnt.SI && src[scanI] == '-' && + scanI+1 < len(src) && (src[scanI+1] == ' ' || src[scanI+1] == '\t') { + keyCol += 2 + scanI += 2 + for scanI < pnt.SI && src[scanI] == ' ' { + keyCol++ + scanI++ + } + } + blockIndent = keyCol + explicitIndent + } else { + parentIndent := 0 + searchI := lineStart - 1 + if searchI > 0 { + if src[searchI] == '\n' { + searchI-- + } + if searchI > 0 && src[searchI] == '\r' { + searchI-- + } + prevLineEnd := searchI + 1 + for searchI > 0 && src[searchI-1] != '\n' && src[searchI-1] != '\r' { + searchI-- + } + prevLineStart := searchI + for ci := prevLineStart; ci < prevLineEnd; ci++ { + if src[ci] == ':' && (ci+1 >= prevLineEnd || src[ci+1] == ' ' || + src[ci+1] == '\t' || src[ci+1] == '\n' || src[ci+1] == '\r') { + parentIndent = 0 + pi := prevLineStart + for pi < prevLineEnd && src[pi] == ' ' { + parentIndent++ + pi++ + } + break + } + } + } + blockIndent = parentIndent + explicitIndent + containingIndent = parentIndent + } + } + + if blockIndent <= containingIndent && !isDocStart && idx < len(fwd) { + // Content is not indented enough — empty block scalar. + var val string + if chomp == "keep" { + blankCount := 0 + bi := idx + for bi < len(fwd) { + if fwd[bi] == '\n' { + blankCount++ + bi++ + } else if fwd[bi] == '\r' { + bi++ + if bi < len(fwd) && fwd[bi] == '\n' { + bi++ + } + blankCount++ + } else { + break + } + } + if blankCount > 0 { + val = strings.Repeat("\n", blankCount) + } else { + val = "\n" + } + idx = bi + } else { + val = "" + } + tkn := lex.Token("#TX", jsonic.TinTX, val, fwd[:idx]) + pnt.SI += idx + pnt.RI++ + pnt.CI = 0 + return &jsonic.LexCheckResult{Done: true, Token: tkn} + } + + // Collect indented lines. + var lines []string + pos := idx + rows := 1 + lastNewlinePos := idx + for pos < len(fwd) { + lineIndent := 0 + for pos+lineIndent < len(fwd) && fwd[pos+lineIndent] == ' ' { + lineIndent++ + } + afterSpaces := pos + lineIndent + if afterSpaces >= len(fwd) || fwd[afterSpaces] == '\n' || fwd[afterSpaces] == '\r' { + if lineIndent > blockIndent { + lines = append(lines, fwd[pos+blockIndent:afterSpaces]) + } else { + lines = append(lines, "") + } + lastNewlinePos = afterSpaces + pos = afterSpaces + if pos < len(fwd) && fwd[pos] == '\r' { + pos++ + } + if pos < len(fwd) && fwd[pos] == '\n' { + pos++ + } + rows++ + continue + } + if lineIndent < blockIndent { + break + } + if lineIndent == 0 && isDocMarker(fwd, pos) { + break + } + lineStartPos := pos + blockIndent + lineEnd := lineStartPos + for lineEnd < len(fwd) && fwd[lineEnd] != '\n' && fwd[lineEnd] != '\r' { + lineEnd++ + } + lines = append(lines, fwd[lineStartPos:lineEnd]) + lastNewlinePos = lineEnd + pos = lineEnd + if pos < len(fwd) && fwd[pos] == '\r' { + pos++ + } + if pos < len(fwd) && fwd[pos] == '\n' { + pos++ + } + rows++ + } + + // Build scalar value. + var val string + if fold { + val = foldLines(lines) + } else { + val = strings.Join(lines, "\n") + } + + // Apply chomping. + if len(lines) == 0 { + val = "" + } else if chomp == "strip" { + val = strings.TrimRight(val, "\n") + } else if chomp == "clip" { + val = strings.TrimRight(val, "\n") + "\n" + } else { + // keep + val = val + "\n" + } + + // Don't consume final newline if more content follows. + endPos := pos + endRows := rows + if pos < len(fwd) && pos > lastNewlinePos { + ni := pos + nextLineIndent := 0 + for ni < len(fwd) && fwd[ni] == ' ' { + nextLineIndent++ + ni++ + } + isNextDocMarker := nextLineIndent == 0 && isDocMarker(fwd, ni) + if !isNextDocMarker { + endPos = lastNewlinePos + endRows = rows - 1 + } + } + + tkn := lex.Token("#TX", jsonic.TinTX, val, fwd[:endPos]) + pnt.SI += endPos + pnt.RI += endRows + pnt.CI = 0 + return &jsonic.LexCheckResult{Done: true, Token: tkn} +} + +// foldLines implements YAML folded scalar line joining. +func foldLines(lines []string) string { + var result strings.Builder + prevWasNormal := false + pendingEmptyCount := 0 + + for _, line := range lines { + isMore := len(line) > 0 && (line[0] == ' ' || line[0] == '\t') + isEmpty := line == "" + + if isEmpty { + pendingEmptyCount++ + } else if isMore { + if prevWasNormal && result.Len() > 0 { + result.WriteByte('\n') + } + for ei := 0; ei < pendingEmptyCount; ei++ { + result.WriteByte('\n') + } + pendingEmptyCount = 0 + if result.Len() > 0 { + s := result.String() + if s[len(s)-1] != '\n' { + result.WriteByte('\n') + } + } + result.WriteString(line) + result.WriteByte('\n') + prevWasNormal = false + } else { + if pendingEmptyCount > 0 { + if prevWasNormal && result.Len() > 0 { + result.WriteByte('\n') + for ei := 1; ei < pendingEmptyCount; ei++ { + result.WriteByte('\n') + } + } else { + for ei := 0; ei < pendingEmptyCount; ei++ { + result.WriteByte('\n') + } + } + pendingEmptyCount = 0 + } + if prevWasNormal && result.Len() > 0 { + s := result.String() + if s[len(s)-1] != '\n' { + result.WriteByte(' ') + } + } + result.WriteString(line) + prevWasNormal = true + } + } + for ei := 0; ei < pendingEmptyCount; ei++ { + result.WriteByte('\n') + } + return result.String() +} + +// handleTagInTextCheck processes !!type tags encountered in the text check callback. +func handleTagInTextCheck(lex *jsonic.Lex, pnt *jsonic.Point, fwd string, tagHandles map[string]string) *jsonic.LexCheckResult { + tagEnd := 2 + for tagEnd < len(fwd) && fwd[tagEnd] != ' ' && fwd[tagEnd] != '\n' && fwd[tagEnd] != '\r' { + tagEnd++ + } + tag := fwd[2:tagEnd] + if tag == "seq" || tag == "map" { + return nil // Let yamlMatcher handle. + } + valStart := tagEnd + if valStart < len(fwd) && fwd[valStart] == ' ' { + valStart++ + } + rawVal := "" + valEnd := valStart + if valStart < len(fwd) && (fwd[valStart] == '"' || fwd[valStart] == '\'') { + q := fwd[valStart] + valEnd = valStart + 1 + for valEnd < len(fwd) && fwd[valEnd] != q { + if fwd[valEnd] == '\\' && q == '"' { + valEnd++ + } + valEnd++ + } + if valEnd < len(fwd) && fwd[valEnd] == q { + valEnd++ + } + rawVal = fwd[valStart+1 : valEnd-1] + } else { + for valEnd < len(fwd) && fwd[valEnd] != '\n' && fwd[valEnd] != '\r' { + if fwd[valEnd] == ':' && (valEnd+1 >= len(fwd) || fwd[valEnd+1] == ' ' || + fwd[valEnd+1] == '\n' || fwd[valEnd+1] == '\r') { + break + } + if fwd[valEnd] == ' ' && valEnd+1 < len(fwd) && fwd[valEnd+1] == '#' { + break + } + valEnd++ + } + rawVal = trimRight(fwd[valStart:valEnd]) + } + + result := applyTagConversion(tag, rawVal, tagHandles) + tknTin := jsonic.TinTX + switch result.(type) { + case float64: + tknTin = jsonic.TinNR + case bool, nil: + tknTin = jsonic.TinVL + } + if result == nil { + tknTin = jsonic.TinVL + } + + tkn := lex.Token(tinToName(tknTin), tknTin, result, fwd[:valEnd]) + pnt.SI += valEnd + pnt.CI += valEnd + return &jsonic.LexCheckResult{Done: true, Token: tkn} +} + +// handlePlainScalar processes YAML plain scalar values with multiline continuation. +func handlePlainScalar(lex *jsonic.Lex, pnt *jsonic.Point, src, fwd string) *jsonic.LexCheckResult { + // Detect flow context. + inFlowCtx := false + depth := 0 + for fi := 0; fi < pnt.SI; fi++ { + fc := src[fi] + if fc == '{' || fc == '[' { + depth++ + } else if fc == '}' || fc == ']' { + if depth > 0 { + depth-- + } + } else if fc == '"' { + fi++ + for fi < pnt.SI && src[fi] != '"' { + if src[fi] == '\\' { + fi++ + } + fi++ + } + } else if fc == '\'' { + fi++ + for fi < pnt.SI && src[fi] != '\'' { + if fi+1 < pnt.SI && src[fi] == '\'' && src[fi+1] == '\'' { + fi++ + } + fi++ + } + } + } + inFlowCtx = depth > 0 + + // Find current line indent. + lineStartPos := pnt.SI + for lineStartPos > 0 && src[lineStartPos-1] != '\n' && src[lineStartPos-1] != '\r' { + lineStartPos-- + } + currentLineIndent := 0 + ci := lineStartPos + for ci < pnt.SI && src[ci] == ' ' { + currentLineIndent++ + ci++ + } + + // Check if text is preceded by ": " on the same line. + isMapValue := false + ci = pnt.SI - 1 + for ci >= lineStartPos && (src[ci] == ' ' || src[ci] == '\t') { + ci-- + } + if ci >= lineStartPos && src[ci] == ':' { + isMapValue = true + } + + minContinuationIndent := currentLineIndent + if isMapValue { + minContinuationIndent = currentLineIndent + 1 + } + + // Scan first line. + text := "" + i := 0 + totalConsumed := 0 + rows := 0 + + scanLine := func() string { + line := "" + for i < len(fwd) { + c := fwd[i] + if c == '\n' || c == '\r' { + break + } + if c == ':' && (i+1 >= len(fwd) || fwd[i+1] == ' ' || fwd[i+1] == '\t' || + fwd[i+1] == '\n' || fwd[i+1] == '\r') { + break + } + if (c == ' ' || c == '\t') && i+1 < len(fwd) && fwd[i+1] == '#' { + break + } + if inFlowCtx && (c == ']' || c == '}') { + break + } + if c == ',' && inFlowCtx { + break + } + line += string(c) + i++ + } + return trimRight(line) + } + + text = scanLine() + totalConsumed = i + + // Check for continuation lines (multiline plain scalars). + for i < len(fwd) && (fwd[i] == '\n' || fwd[i] == '\r') { + nlPos := i + blankLines := 0 + for i < len(fwd) && (fwd[i] == '\n' || fwd[i] == '\r') { + if fwd[i] == '\r' { + i++ + } + if i < len(fwd) && fwd[i] == '\n' { + i++ + } + li := 0 + for i+li < len(fwd) && (fwd[i+li] == ' ' || fwd[i+li] == '\t') { + li++ + } + if i+li >= len(fwd) || fwd[i+li] == '\n' || fwd[i+li] == '\r' { + blankLines++ + i += li + continue + } + break + } + lineIndent := 0 + for i < len(fwd) && (fwd[i] == ' ' || fwd[i] == '\t') { + lineIndent++ + i++ + } + + isNextDocMarker := lineIndent == 0 && i < len(fwd) && isDocMarker(fwd, i) + isSeqMarker := false + if i < len(fwd) && fwd[i] == '-' && (i+1 >= len(fwd) || fwd[i+1] == ' ' || + fwd[i+1] == '\t' || fwd[i+1] == '\n' || fwd[i+1] == '\r') { + seqIndent := -1 + si := pnt.SI - 1 + for si >= lineStartPos { + if src[si] == '-' && (si+1 < len(src) && (src[si+1] == ' ' || src[si+1] == '\t')) { + seqIndent = si - lineStartPos + break + } + si-- + } + isSeqMarker = (seqIndent >= 0 && lineIndent == seqIndent) || + (seqIndent < 0 && lineIndent <= currentLineIndent) + } + + canContinue := false + if inFlowCtx { + canContinue = i < len(fwd) && fwd[i] != '\n' && fwd[i] != '\r' && + fwd[i] != '#' && fwd[i] != '{' && fwd[i] != '}' && + fwd[i] != '[' && fwd[i] != ']' + } else { + canContinue = lineIndent >= minContinuationIndent && i < len(fwd) && + fwd[i] != '\n' && fwd[i] != '\r' && fwd[i] != '#' && + !isNextDocMarker && !isSeqMarker + } + + if canContinue { + // Check if continuation line is a key-value pair. + isKV := false + peekJ := i + for peekJ < len(fwd) && fwd[peekJ] != '\n' && fwd[peekJ] != '\r' { + if fwd[peekJ] == ':' && (peekJ+1 >= len(fwd) || fwd[peekJ+1] == ' ' || + fwd[peekJ+1] == '\t' || fwd[peekJ+1] == '\n' || fwd[peekJ+1] == '\r') { + isKV = true + break + } + if fwd[peekJ] == '}' || fwd[peekJ] == ']' || fwd[peekJ] == ',' { + break + } + peekJ++ + } + if !isKV || inFlowCtx { + contLine := scanLine() + if len(contLine) > 0 { + if blankLines > 0 { + for b := 0; b < blankLines; b++ { + text += "\n" + } + } else { + text += " " + } + text += contLine + totalConsumed = i + rows++ + continue + } + } + } + i = nlPos + break + } + + text = trimRight(text) + if len(text) == 0 { + return nil + } + + // Check if this is a YAML value keyword. + if val, ok := isYamlValue(text); ok { + tkn := lex.Token("#VL", jsonic.TinVL, val, text) + pnt.SI += len(text) + pnt.CI += len(text) + return &jsonic.LexCheckResult{Done: true, Token: tkn} + } + + // Check if it's a number. + if num, ok := parseYamlNumber(text); ok { + tkn := lex.Token("#NR", jsonic.TinNR, num, text) + pnt.SI += len(text) + pnt.CI += len(text) + return &jsonic.LexCheckResult{Done: true, Token: tkn} + } + + // Plain text. + tkn := lex.Token("#TX", jsonic.TinTX, text, fwd[:totalConsumed]) + pnt.SI += totalConsumed + pnt.RI += rows + pnt.CI += totalConsumed + return &jsonic.LexCheckResult{Done: true, Token: tkn} +} + +// handleTypeTag processes !!type tags (!!str, !!int, !!float, etc.). +func handleTypeTag(lex *jsonic.Lex, pnt *jsonic.Point, fwd string, + tagHandles map[string]string, pendingAnchors *[]anchorInfo, + anchors map[string]any, TX, NR, VL, ST jsonic.Tin) *jsonic.Token { + + tagEnd := 2 + for tagEnd < len(fwd) && fwd[tagEnd] != ' ' && fwd[tagEnd] != '\n' && + fwd[tagEnd] != '\r' && fwd[tagEnd] != ',' && + fwd[tagEnd] != '}' && fwd[tagEnd] != ']' && fwd[tagEnd] != ':' { + tagEnd++ + } + tag := fwd[2:tagEnd] + valStart := tagEnd + if valStart < len(fwd) && fwd[valStart] == ' ' { + valStart++ + } + valEnd := valStart + + // Skip anchor before value. + tagAnchorName := "" + if valStart < len(fwd) && fwd[valStart] == '&' { + anchorEnd := valStart + 1 + for anchorEnd < len(fwd) && fwd[anchorEnd] != ' ' && fwd[anchorEnd] != '\n' && fwd[anchorEnd] != '\r' { + anchorEnd++ + } + tagAnchorName = fwd[valStart+1 : anchorEnd] + *pendingAnchors = append(*pendingAnchors, anchorInfo{name: tagAnchorName, inline: true}) + if anchorEnd < len(fwd) && fwd[anchorEnd] == ' ' { + anchorEnd++ + } + valStart = anchorEnd + valEnd = valStart + } + + // Check for quoted value. + if valStart < len(fwd) && (fwd[valStart] == '"' || fwd[valStart] == '\'') { + q := fwd[valStart] + valEnd = valStart + 1 + for valEnd < len(fwd) && fwd[valEnd] != q { + if fwd[valEnd] == '\\' && q == '"' { + valEnd++ + } + valEnd++ + } + if valEnd < len(fwd) && fwd[valEnd] == q { + valEnd++ + } + rawVal := fwd[valStart+1 : valEnd-1] + result := applyTagConversion(tag, rawVal, tagHandles) + if tagAnchorName != "" { + anchors[tagAnchorName] = result + } + tknTin := TX + switch result.(type) { + case float64: + tknTin = NR + case bool: + tknTin = VL + } + if result == nil { + tknTin = VL + } + tkn := lex.Token(tinToName(tknTin), tknTin, result, fwd[:valEnd]) + pnt.SI += valEnd + pnt.CI += valEnd + return tkn + } + + // Tag followed by newline — skip and let next cycle handle. + if valStart < len(fwd) && (fwd[valStart] == '\n' || fwd[valStart] == '\r') && valStart < len(fwd)-1 { + nl := valStart + if nl < len(fwd) && fwd[nl] == '\r' { + nl++ + } + if nl < len(fwd) && fwd[nl] == '\n' { + nl++ + } + pnt.SI += nl + pnt.CI = 0 + pnt.RI++ + return nil // Will re-enter matcher + } + + // Unquoted value. + for valEnd < len(fwd) && fwd[valEnd] != '\n' && fwd[valEnd] != '\r' && + fwd[valEnd] != ',' && fwd[valEnd] != '}' && fwd[valEnd] != ']' { + if fwd[valEnd] == ':' && (valEnd+1 >= len(fwd) || fwd[valEnd+1] == ' ' || + fwd[valEnd+1] == '\n' || fwd[valEnd+1] == '\r') { + break + } + if fwd[valEnd] == ' ' && valEnd+1 < len(fwd) && fwd[valEnd+1] == '#' { + break + } + valEnd++ + } + rawVal := trimRight(fwd[valStart:valEnd]) + result := applyTagConversion(tag, rawVal, tagHandles) + if tagAnchorName != "" { + anchors[tagAnchorName] = result + } + tknTin := TX + switch result.(type) { + case string: + if result.(string) == "" { + tknTin = ST + } else { + tknTin = TX + } + case float64: + tknTin = NR + case bool: + tknTin = VL + } + if result == nil { + tknTin = VL + } + tkn := lex.Token(tinToName(tknTin), tknTin, result, fwd[:valEnd]) + pnt.SI += valEnd + pnt.CI += valEnd + return tkn +} + +// handleExplicitKey processes ? key\n: value patterns. +func handleExplicitKey(lex *jsonic.Lex, pnt *jsonic.Point, fwd string, + pendingExplicitCL *bool, pendingTokens *[]*jsonic.Token, + TX, CL, VL jsonic.Tin) *jsonic.Token { + + start := 1 + if len(fwd) > 1 && (fwd[1] == ' ' || fwd[1] == '\t') { + start = 2 + } + + // Collect key text. + keyEnd := start + for keyEnd < len(fwd) && fwd[keyEnd] != '\n' && fwd[keyEnd] != '\r' { + if fwd[keyEnd] == ' ' && keyEnd+1 < len(fwd) && fwd[keyEnd+1] == '#' { + break + } + keyEnd++ + } + key := trimRight(fwd[start:keyEnd]) + consumed := keyEnd + + // Skip comment at end of key line. + for consumed < len(fwd) && fwd[consumed] != '\n' && fwd[consumed] != '\r' { + consumed++ + } + beforeNewline := consumed + + // Consume newline. + if consumed < len(fwd) && fwd[consumed] == '\r' { + consumed++ + } + if consumed < len(fwd) && fwd[consumed] == '\n' { + consumed++ + } + + // Check for continuation lines. + qIndent := 0 + li := pnt.SI + for li > 0 && lex.Src[li-1] != '\n' && lex.Src[li-1] != '\r' { + li-- + } + for li < pnt.SI && lex.Src[li] == ' ' { + qIndent++ + li++ + } + + // Scan continuation lines (plain scalar multiline key). + for consumed < len(fwd) { + lineIndent := 0 + for consumed+lineIndent < len(fwd) && fwd[consumed+lineIndent] == ' ' { + lineIndent++ + } + afterSpaces := consumed + lineIndent + if afterSpaces < len(fwd) && fwd[afterSpaces] == '#' { + for afterSpaces < len(fwd) && fwd[afterSpaces] != '\n' && fwd[afterSpaces] != '\r' { + afterSpaces++ + } + beforeNewline = afterSpaces + if afterSpaces < len(fwd) && fwd[afterSpaces] == '\r' { + afterSpaces++ + } + if afterSpaces < len(fwd) && fwd[afterSpaces] == '\n' { + afterSpaces++ + } + consumed = afterSpaces + continue + } + if lineIndent > qIndent && afterSpaces < len(fwd) && + fwd[afterSpaces] != ':' && fwd[afterSpaces] != '?' && fwd[afterSpaces] != '-' { + contEnd := afterSpaces + for contEnd < len(fwd) && fwd[contEnd] != '\n' && fwd[contEnd] != '\r' { + if fwd[contEnd] == ' ' && contEnd+1 < len(fwd) && fwd[contEnd+1] == '#' { + break + } + contEnd++ + } + contText := trimRight(fwd[afterSpaces:contEnd]) + if len(contText) > 0 { + key += " " + contText + } + consumed = contEnd + beforeNewline = consumed + if consumed < len(fwd) && fwd[consumed] == '\r' { + consumed++ + } + if consumed < len(fwd) && fwd[consumed] == '\n' { + consumed++ + } + continue + } + break + } + + // Check if next line starts with ":". + hasValue := false + valConsumed := consumed + ci := consumed + for ci < len(fwd) && fwd[ci] == ' ' { + ci++ + } + if ci < len(fwd) && fwd[ci] == ':' && + (ci+1 >= len(fwd) || fwd[ci+1] == ' ' || fwd[ci+1] == '\t' || + fwd[ci+1] == '\n' || fwd[ci+1] == '\r') { + hasValue = true + valConsumed = ci + 1 + if valConsumed < len(fwd) && (fwd[valConsumed] == ' ' || fwd[valConsumed] == '\t') { + valConsumed++ + } + } + + if hasValue { + pnt.SI += valConsumed + pnt.RI++ + pnt.CI = valConsumed - consumed + 1 + *pendingExplicitCL = true + } else { + pnt.SI += beforeNewline + pnt.CI += beforeNewline + clTkn := lex.Token("#CL", CL, 1, ": ") + vlTkn := lex.Token("#VL", VL, nil, "") + *pendingTokens = append(*pendingTokens, clTkn, vlTkn) + } + + tkn := lex.Token("#TX", TX, key, fwd[:keyEnd]) + return tkn +} + +// handleDocMarker processes --- and ... document markers. +func handleDocMarker(lex *jsonic.Lex, pnt *jsonic.Point, fwd string, + IN jsonic.Tin, pendingAnchors *[]anchorInfo, anchors map[string]any, + TX jsonic.Tin) *jsonic.Token { + + pos := 3 + for pos < len(fwd) && fwd[pos] != '\n' && fwd[pos] != '\r' { + pos++ + } + + if fwd[0] == '.' { + // ... terminates document. + pnt.SI += pos + pnt.CI += pos + for pnt.SI < pnt.Len && (lex.Src[pnt.SI] == '\n' || lex.Src[pnt.SI] == '\r') { + if lex.Src[pnt.SI] == '\r' { + pnt.SI++ + } + if pnt.SI < pnt.Len && lex.Src[pnt.SI] == '\n' { + pnt.SI++ + } + pnt.RI++ + } + return lex.Token("#ZZ", jsonic.TinZZ, jsonic.Undefined, "") + } + + // --- handler. + afterDash := 3 + for afterDash < len(fwd) && fwd[afterDash] == ' ' { + afterDash++ + } + dashNextCh := byte(0) + if afterDash < len(fwd) { + dashNextCh = fwd[afterDash] + } + hasInlineValue := dashNextCh != 0 && dashNextCh != '\n' && dashNextCh != '\r' && + dashNextCh != '&' && dashNextCh != '!' && dashNextCh != '#' + + if hasInlineValue { + pnt.SI += afterDash + pnt.CI = afterDash + return nil // Fall through to continue matching. + } + + // Plain --- with nothing after it. + pnt.SI += pos + pnt.RI++ + if pnt.SI < pnt.Len && lex.Src[pnt.SI] == '\r' { + pnt.SI++ + } + if pnt.SI < pnt.Len && lex.Src[pnt.SI] == '\n' { + pnt.SI++ + } + spaces := 0 + for pnt.SI+spaces < pnt.Len && lex.Src[pnt.SI+spaces] == ' ' { + spaces++ + } + pnt.SI += spaces + pnt.CI = spaces + + if pnt.SI >= pnt.Len { + return lex.Token("#ZZ", jsonic.TinZZ, jsonic.Undefined, "") + } + + nextCh := lex.Src[pnt.SI] + if nextCh == '{' || nextCh == '[' || nextCh == '"' || nextCh == '\'' { + return nil // Fall through. + } + if spaces == 0 && nextCh != '-' && nextCh != '.' && nextCh != '?' && + nextCh != '\n' && nextCh != '\r' { + return nil // Fall through. + } + + // Emit #IN with indent level. + return lex.Token("#IN", IN, spaces, fwd[:pos+1+spaces]) +} + +// handleDoubleQuotedString processes YAML double-quoted strings. +func handleDoubleQuotedString(lex *jsonic.Lex, pnt *jsonic.Point, fwd string, ST jsonic.Tin) *jsonic.Token { + i := 1 + val := "" + escapedUpTo := 0 + + for i < len(fwd) && fwd[i] != '"' { + if fwd[i] == '\\' { + i++ + if i >= len(fwd) { + break + } + esc := fwd[i] + switch esc { + case 'n': + val += "\n" + i++ + escapedUpTo = len(val) + case 't': + val += "\t" + i++ + escapedUpTo = len(val) + case 'r': + val += "\r" + i++ + escapedUpTo = len(val) + case '"': + val += "\"" + i++ + escapedUpTo = len(val) + case '\\': + val += "\\" + i++ + escapedUpTo = len(val) + case '/': + val += "/" + i++ + escapedUpTo = len(val) + case 'b': + val += "\b" + i++ + escapedUpTo = len(val) + case 'f': + val += "\f" + i++ + escapedUpTo = len(val) + case 'a': + val += "\x07" + i++ + escapedUpTo = len(val) + case 'e': + val += "\x1b" + i++ + escapedUpTo = len(val) + case 'v': + val += "\v" + i++ + escapedUpTo = len(val) + case '0': + val += "\x00" + i++ + escapedUpTo = len(val) + case ' ': + val += " " + i++ + escapedUpTo = len(val) + case '_': + val += "\u00a0" + i++ + escapedUpTo = len(val) + case 'N': + val += "\u0085" + i++ + escapedUpTo = len(val) + case 'L': + val += "\u2028" + i++ + escapedUpTo = len(val) + case 'P': + val += "\u2029" + i++ + escapedUpTo = len(val) + case 'x': + if i+3 <= len(fwd) { + n, err := strconv.ParseInt(fwd[i+1:i+3], 16, 32) + if err == nil { + val += string(rune(n)) + i += 3 + escapedUpTo = len(val) + } else { + val += string(esc) + i++ + } + } else { + val += string(esc) + i++ + } + case 'u': + if i+5 <= len(fwd) { + n, err := strconv.ParseInt(fwd[i+1:i+5], 16, 32) + if err == nil { + val += string(rune(n)) + i += 5 + escapedUpTo = len(val) + } else { + val += string(esc) + i++ + } + } else { + val += string(esc) + i++ + } + case 'U': + if i+9 <= len(fwd) { + n, err := strconv.ParseInt(fwd[i+1:i+9], 16, 32) + if err == nil { + val += string(rune(n)) + i += 9 + escapedUpTo = len(val) + } else { + val += string(esc) + i++ + } + } else { + val += string(esc) + i++ + } + case '\n', '\r': + // Escaped newline: line continuation. + if esc == '\r' && i+1 < len(fwd) && fwd[i+1] == '\n' { + i++ + } + i++ + for i < len(fwd) && (fwd[i] == ' ' || fwd[i] == '\t') { + i++ + } + default: + val += string(esc) + i++ + } + } else if fwd[i] == '\n' || fwd[i] == '\r' { + // Flow scalar line folding. + trimTo := len(val) + for trimTo > escapedUpTo && (val[trimTo-1] == ' ' || val[trimTo-1] == '\t') { + trimTo-- + } + val = val[:trimTo] + emptyLines := 0 + for i < len(fwd) && (fwd[i] == '\n' || fwd[i] == '\r') { + if fwd[i] == '\r' { + i++ + } + if i < len(fwd) && fwd[i] == '\n' { + i++ + } + emptyLines++ + for i < len(fwd) && (fwd[i] == ' ' || fwd[i] == '\t') { + i++ + } + } + if emptyLines > 1 { + for e := 1; e < emptyLines; e++ { + val += "\n" + } + } else { + val += " " + } + } else { + val += string(fwd[i]) + i++ + } + } + if i < len(fwd) && fwd[i] == '"' { + i++ + } + tkn := lex.Token("#ST", ST, val, fwd[:i]) + pnt.SI += i + pnt.CI += i + return tkn +} + +// handleSingleQuotedString processes YAML single-quoted strings. +func handleSingleQuotedString(lex *jsonic.Lex, pnt *jsonic.Point, fwd string, ST jsonic.Tin) *jsonic.Token { + i := 1 + val := "" + for i < len(fwd) { + if fwd[i] == '\'' { + if i+1 < len(fwd) && fwd[i+1] == '\'' { + val += "'" + i += 2 + } else { + i++ + break + } + } else if fwd[i] == '\n' || fwd[i] == '\r' { + // Flow scalar line folding. + val = strings.TrimRight(val, " \t") + emptyLines := 0 + for i < len(fwd) && (fwd[i] == '\n' || fwd[i] == '\r') { + if fwd[i] == '\r' { + i++ + } + if i < len(fwd) && fwd[i] == '\n' { + i++ + } + emptyLines++ + for i < len(fwd) && (fwd[i] == ' ' || fwd[i] == '\t') { + i++ + } + } + if emptyLines > 1 { + for e := 1; e < emptyLines; e++ { + val += "\n" + } + } else { + val += " " + } + } else { + val += string(fwd[i]) + i++ + } + } + tkn := lex.Token("#ST", ST, val, fwd[:i]) + pnt.SI += i + pnt.CI += i + return tkn +} + +// handleNumericColon handles plain scalars starting with digits that contain colons. +func handleNumericColon(lex *jsonic.Lex, pnt *jsonic.Point, fwd string, TX jsonic.Tin) *jsonic.Token { + hasEmbeddedColon := false + pi := 1 + for pi < len(fwd) && fwd[pi] != '\n' && fwd[pi] != '\r' { + if fwd[pi] == ':' && pi+1 < len(fwd) && fwd[pi+1] != ' ' && fwd[pi+1] != '\t' && + fwd[pi+1] != '\n' && fwd[pi+1] != '\r' { + hasEmbeddedColon = true + break + } + if fwd[pi] == ' ' || fwd[pi] == '\t' { + break + } + pi++ + } + if !hasEmbeddedColon { + return nil + } + end := 0 + for end < len(fwd) && fwd[end] != ' ' && fwd[end] != '\t' && + fwd[end] != '\n' && fwd[end] != '\r' { + end++ + } + text := fwd[:end] + tkn := lex.Token("#TX", TX, text, text) + pnt.SI += end + pnt.CI += end + return tkn +} + +// applyTagConversion applies !!type tag conversion to a raw value. +func applyTagConversion(tag, rawVal string, tagHandles map[string]string) any { + if _, ok := tagHandles["!!"]; ok { + return rawVal // Custom tag handle — don't apply built-in conversion. + } + switch tag { + case "str": + return rawVal + case "int": + n, err := strconv.ParseInt(rawVal, 10, 64) + if err == nil { + return float64(n) + } + return rawVal + case "float": + n, err := strconv.ParseFloat(rawVal, 64) + if err == nil { + return n + } + return rawVal + case "bool": + return rawVal == "true" || rawVal == "True" || rawVal == "TRUE" + case "null": + return nil + default: + return rawVal + } +} + +// tinToName converts a Tin to its name string. +func tinToName(tin jsonic.Tin) string { + switch tin { + case jsonic.TinTX: + return "#TX" + case jsonic.TinNR: + return "#NR" + case jsonic.TinST: + return "#ST" + case jsonic.TinVL: + return "#VL" + case jsonic.TinOB: + return "#OB" + case jsonic.TinCB: + return "#CB" + case jsonic.TinOS: + return "#OS" + case jsonic.TinCS: + return "#CS" + case jsonic.TinCL: + return "#CL" + case jsonic.TinCA: + return "#CA" + case jsonic.TinZZ: + return "#ZZ" + default: + return "#UK" + } +} diff --git a/go/yaml.go b/go/yaml.go new file mode 100644 index 0000000..74b8b95 --- /dev/null +++ b/go/yaml.go @@ -0,0 +1,204 @@ +package yaml + +import ( + "encoding/json" + "fmt" + "math" + "strconv" + "strings" + + jsonic "github.com/jsonicjs/jsonic/go" +) + +// YamlOptions configures the YAML parser plugin. +// Currently empty — reserved for future extension. +type YamlOptions struct{} + +// Parse parses a YAML string and returns the resulting Go value. +// The returned value can be: +// - map[string]any for mappings +// - []any for sequences +// - float64 for numbers +// - string for strings +// - bool for booleans +// - nil for null or empty input +func Parse(src string) (any, error) { + j := MakeJsonic() + return j.Parse(src) +} + +// MakeJsonic creates a jsonic instance configured for YAML parsing. +func MakeJsonic(opts ...YamlOptions) *jsonic.Jsonic { + j := jsonic.Make(jsonic.Options{ + String: &jsonic.StringOptions{ + Chars: "`", // Remove single quote from string chars; we handle YAML strings in yamlMatcher + }, + Lex: &jsonic.LexOptions{ + EmptyResult: nil, + }, + }) + + j.Use(Yaml, nil) + return j +} + +// yamlValueMap maps YAML value keywords to their Go values. +var yamlValueMap = map[string]any{ + "true": true, "True": true, "TRUE": true, + "false": false, "False": false, "FALSE": false, + "null": nil, "Null": nil, "NULL": nil, + "~": nil, + "yes": true, "Yes": true, "YES": true, + "no": false, "No": false, "NO": false, + "on": true, "On": true, "ON": true, + "off": false, "Off": false, "OFF": false, + ".inf": math.Inf(1), ".Inf": math.Inf(1), ".INF": math.Inf(1), + "-.inf": math.Inf(-1), "-.Inf": math.Inf(-1), "-.INF": math.Inf(-1), + ".nan": math.NaN(), ".NaN": math.NaN(), ".NAN": math.NaN(), +} + +// isYamlValue checks if text is a YAML value keyword and returns the value. +func isYamlValue(text string) (any, bool) { + val, ok := yamlValueMap[text] + return val, ok +} + +// parseYamlNumber attempts to parse text as a YAML number. +// Returns the number and true if successful, or 0 and false if not a number. +func parseYamlNumber(text string) (float64, bool) { + if text == "" { + return 0, false + } + // Try standard float parsing + num, err := strconv.ParseFloat(text, 64) + if err == nil { + return num, true + } + // Try integer formats: hex, octal, binary + if strings.HasPrefix(text, "0x") || strings.HasPrefix(text, "0X") { + if n, err := strconv.ParseInt(text[2:], 16, 64); err == nil { + return float64(n), true + } + } + if strings.HasPrefix(text, "0o") || strings.HasPrefix(text, "0O") { + if n, err := strconv.ParseInt(text[2:], 8, 64); err == nil { + return float64(n), true + } + } + if strings.HasPrefix(text, "0b") || strings.HasPrefix(text, "0B") { + if n, err := strconv.ParseInt(text[2:], 2, 64); err == nil { + return float64(n), true + } + } + // Negative hex/oct/bin + if len(text) > 1 && text[0] == '-' { + if num, ok := parseYamlNumber(text[1:]); ok { + return -num, true + } + } + if len(text) > 1 && text[0] == '+' { + return parseYamlNumber(text[1:]) + } + return 0, false +} + +// deepCopy performs a JSON-based deep copy of a value. +func deepCopy(v any) any { + if v == nil { + return nil + } + switch val := v.(type) { + case map[string]any: + data, err := json.Marshal(val) + if err != nil { + return v + } + var result map[string]any + if err := json.Unmarshal(data, &result); err != nil { + return v + } + return result + case []any: + data, err := json.Marshal(val) + if err != nil { + return v + } + var result []any + if err := json.Unmarshal(data, &result); err != nil { + return v + } + return result + default: + return v + } +} + +// extractKey extracts a key value from a token, resolving aliases. +func extractKey(o0 *jsonic.Token, anchors map[string]any) any { + if o0.Tin == jsonic.TinVL { + if m, ok := o0.Val.(map[string]any); ok { + if alias, ok := m["__yamlAlias"].(string); ok { + if val, exists := anchors[alias]; exists { + return val + } + return "*" + alias + } + } + } + if o0.Tin == jsonic.TinST || o0.Tin == jsonic.TinTX { + if s, ok := o0.Val.(string); ok { + return s + } + } + return o0.Src +} + +// anchorInfo holds anchor metadata during parsing. +type anchorInfo struct { + name string + inline bool +} + +// isDocMarker checks if the string at position i starts with --- or ... +// followed by a space, tab, newline, or end of string. +func isDocMarker(s string, i int) bool { + if i+3 > len(s) { + return false + } + marker := s[i : i+3] + if marker != "---" && marker != "..." { + return false + } + if i+3 >= len(s) { + return true + } + next := s[i+3] + return next == '\n' || next == '\r' || next == ' ' || next == '\t' +} + +// trimRight removes trailing whitespace from a string. +func trimRight(s string) string { + return strings.TrimRight(s, " \t") +} + +// formatKey converts a value to a string suitable for use as a map key. +func formatKey(v any) string { + switch k := v.(type) { + case string: + return k + case float64: + if k == float64(int64(k)) { + return fmt.Sprintf("%d", int64(k)) + } + return fmt.Sprintf("%g", k) + case bool: + if k { + return "true" + } + return "false" + case nil: + return "null" + default: + return fmt.Sprintf("%v", v) + } +} diff --git a/go/yaml_test.go b/go/yaml_test.go new file mode 100644 index 0000000..ca17b03 --- /dev/null +++ b/go/yaml_test.go @@ -0,0 +1,630 @@ +package yaml + +import ( + "encoding/json" + "math" + "reflect" + "testing" +) + +// y is a helper that parses YAML and returns the result. +func y(t *testing.T, src string) any { + t.Helper() + result, err := Parse(src) + if err != nil { + t.Fatalf("Parse error: %v\nInput: %q", err, src) + } + return result +} + +// jsonNormalize round-trips through JSON to normalize types (e.g., int→float64). +func jsonNormalize(v any) any { + data, err := json.Marshal(v) + if err != nil { + return v + } + var out any + if err := json.Unmarshal(data, &out); err != nil { + return v + } + return out +} + +func expectEqual(t *testing.T, got, want any) { + t.Helper() + gotN := jsonNormalize(got) + wantN := jsonNormalize(want) + if !reflect.DeepEqual(gotN, wantN) { + gotJSON, _ := json.MarshalIndent(gotN, "", " ") + wantJSON, _ := json.MarshalIndent(wantN, "", " ") + t.Errorf("Mismatch:\nGot: %s\nWant: %s", gotJSON, wantJSON) + } +} + +// ===== BLOCK MAPPINGS ===== + +func TestSinglePair(t *testing.T) { + expectEqual(t, y(t, "a: 1"), map[string]any{"a": float64(1)}) +} + +func TestMultiplePairs(t *testing.T) { + expectEqual(t, y(t, "a: 1\nb: 2\nc: 3"), map[string]any{"a": float64(1), "b": float64(2), "c": float64(3)}) +} + +func TestNestedMap(t *testing.T) { + expectEqual(t, y(t, "a:\n b: 1\n c: 2"), map[string]any{"a": map[string]any{"b": float64(1), "c": float64(2)}}) +} + +func TestDeeplyNestedMap(t *testing.T) { + expectEqual(t, y(t, "a:\n b:\n c:\n d: 1"), + map[string]any{"a": map[string]any{"b": map[string]any{"c": map[string]any{"d": float64(1)}}}}) +} + +func TestSiblingNestedMaps(t *testing.T) { + expectEqual(t, y(t, "a:\n x: 1\nb:\n y: 2"), + map[string]any{"a": map[string]any{"x": float64(1)}, "b": map[string]any{"y": float64(2)}}) +} + +func TestEmptyValueFollowedBySibling(t *testing.T) { + expectEqual(t, y(t, "a:\nb: 1"), map[string]any{"a": nil, "b": float64(1)}) +} + +func TestColonAtEndOfLine(t *testing.T) { + expectEqual(t, y(t, "a:\n b: 1"), map[string]any{"a": map[string]any{"b": float64(1)}}) +} + +func TestTrailingNewline(t *testing.T) { + expectEqual(t, y(t, "a: 1\n"), map[string]any{"a": float64(1)}) +} + +// ===== BLOCK SEQUENCES ===== + +func TestSimpleList(t *testing.T) { + expectEqual(t, y(t, "- a\n- b\n- c"), []any{"a", "b", "c"}) +} + +func TestSingleElement(t *testing.T) { + expectEqual(t, y(t, "- a"), []any{"a"}) +} + +func TestNestedListInMap(t *testing.T) { + expectEqual(t, y(t, "items:\n - a\n - b"), map[string]any{"items": []any{"a", "b"}}) +} + +func TestListOfNumbers(t *testing.T) { + expectEqual(t, y(t, "- 1\n- 2\n- 3"), []any{float64(1), float64(2), float64(3)}) +} + +func TestListOfMaps(t *testing.T) { + expectEqual(t, y(t, "- name: alice\n- name: bob"), + []any{map[string]any{"name": "alice"}, map[string]any{"name": "bob"}}) +} + +func TestNestedListOfMapsMultikey(t *testing.T) { + expectEqual(t, y(t, "items:\n - name: alice\n age: 30\n - name: bob\n age: 25"), + map[string]any{"items": []any{ + map[string]any{"name": "alice", "age": float64(30)}, + map[string]any{"name": "bob", "age": float64(25)}, + }}) +} + +func TestDeeplyNestedList(t *testing.T) { + expectEqual(t, y(t, "a:\n b:\n - x\n - y"), + map[string]any{"a": map[string]any{"b": []any{"x", "y"}}}) +} + +func TestMixedMapThenList(t *testing.T) { + expectEqual(t, y(t, "a: 1\nb:\n - x\n - y\nc: 3"), + map[string]any{"a": float64(1), "b": []any{"x", "y"}, "c": float64(3)}) +} + +// ===== SCALAR TYPES ===== + +func TestInteger(t *testing.T) { + expectEqual(t, y(t, "a: 42"), map[string]any{"a": float64(42)}) +} + +func TestNegativeInteger(t *testing.T) { + expectEqual(t, y(t, "a: -7"), map[string]any{"a": float64(-7)}) +} + +func TestFloat(t *testing.T) { + expectEqual(t, y(t, "a: 3.14"), map[string]any{"a": float64(3.14)}) +} + +func TestZero(t *testing.T) { + expectEqual(t, y(t, "a: 0"), map[string]any{"a": float64(0)}) +} + +func TestBooleanTrue(t *testing.T) { + expectEqual(t, y(t, "a: true"), map[string]any{"a": true}) +} + +func TestBooleanFalse(t *testing.T) { + expectEqual(t, y(t, "a: false"), map[string]any{"a": false}) +} + +func TestNullKeyword(t *testing.T) { + expectEqual(t, y(t, "a: null"), map[string]any{"a": nil}) +} + +func TestTildeNull(t *testing.T) { + expectEqual(t, y(t, "a: ~"), map[string]any{"a": nil}) +} + +func TestEmptyValueNull(t *testing.T) { + expectEqual(t, y(t, "a:"), map[string]any{"a": nil}) +} + +func TestPlainString(t *testing.T) { + expectEqual(t, y(t, "a: hello world"), map[string]any{"a": "hello world"}) +} + +func TestOctalNumber(t *testing.T) { + expectEqual(t, y(t, "a: 0o77"), map[string]any{"a": float64(63)}) +} + +func TestHexNumber(t *testing.T) { + expectEqual(t, y(t, "a: 0xFF"), map[string]any{"a": float64(255)}) +} + +func TestPositiveInfinity(t *testing.T) { + result := y(t, "a: .inf") + m, ok := result.(map[string]any) + if !ok { + t.Fatalf("expected map, got %T", result) + } + v, ok := m["a"].(float64) + if !ok || !math.IsInf(v, 1) { + t.Errorf("expected +Inf, got %v", m["a"]) + } +} + +func TestNegativeInfinity(t *testing.T) { + result := y(t, "a: -.inf") + m, ok := result.(map[string]any) + if !ok { + t.Fatalf("expected map, got %T", result) + } + v, ok := m["a"].(float64) + if !ok || !math.IsInf(v, -1) { + t.Errorf("expected -Inf, got %v", m["a"]) + } +} + +func TestNaN(t *testing.T) { + result := y(t, "a: .nan") + m, ok := result.(map[string]any) + if !ok { + t.Fatalf("expected map, got %T", result) + } + v, ok := m["a"].(float64) + if !ok || !math.IsNaN(v) { + t.Errorf("expected NaN, got %v", m["a"]) + } +} + +func TestYesBoolean(t *testing.T) { + expectEqual(t, y(t, "a: yes"), map[string]any{"a": true}) +} + +func TestNoBoolean(t *testing.T) { + expectEqual(t, y(t, "a: no"), map[string]any{"a": false}) +} + +func TestOnBoolean(t *testing.T) { + expectEqual(t, y(t, "a: on"), map[string]any{"a": true}) +} + +func TestOffBoolean(t *testing.T) { + expectEqual(t, y(t, "a: off"), map[string]any{"a": false}) +} + +// ===== QUOTED STRINGS ===== + +func TestDoubleQuoted(t *testing.T) { + expectEqual(t, y(t, `a: "hello"`), map[string]any{"a": "hello"}) +} + +func TestSingleQuoted(t *testing.T) { + expectEqual(t, y(t, `a: 'hello'`), map[string]any{"a": "hello"}) +} + +func TestDoubleQuotedWithColon(t *testing.T) { + expectEqual(t, y(t, `a: "key: value"`), map[string]any{"a": "key: value"}) +} + +func TestSingleQuotedWithColon(t *testing.T) { + expectEqual(t, y(t, `a: 'key: value'`), map[string]any{"a": "key: value"}) +} + +func TestDoubleQuotedEmpty(t *testing.T) { + expectEqual(t, y(t, `a: ""`), map[string]any{"a": ""}) +} + +func TestSingleQuotedEmpty(t *testing.T) { + expectEqual(t, y(t, `a: ''`), map[string]any{"a": ""}) +} + +func TestQuotedNumberStaysString(t *testing.T) { + expectEqual(t, y(t, `a: "42"`), map[string]any{"a": "42"}) +} + +func TestQuotedBooleanStaysString(t *testing.T) { + expectEqual(t, y(t, `a: "true"`), map[string]any{"a": "true"}) +} + +// ===== BLOCK SCALARS ===== + +func TestLiteralBlock(t *testing.T) { + expectEqual(t, y(t, "a: |\n line1\n line2\n line3"), + map[string]any{"a": "line1\nline2\nline3\n"}) +} + +func TestLiteralBlockStrip(t *testing.T) { + expectEqual(t, y(t, "a: |-\n line1\n line2"), + map[string]any{"a": "line1\nline2"}) +} + +func TestLiteralBlockKeep(t *testing.T) { + expectEqual(t, y(t, "a: |+\n line1\n line2\n\n"), + map[string]any{"a": "line1\nline2\n\n"}) +} + +func TestFoldedBlock(t *testing.T) { + expectEqual(t, y(t, "a: >\n line1\n line2\n line3"), + map[string]any{"a": "line1 line2 line3\n"}) +} + +func TestFoldedBlockStrip(t *testing.T) { + expectEqual(t, y(t, "a: >-\n line1\n line2"), + map[string]any{"a": "line1 line2"}) +} + +func TestFoldedBlockKeep(t *testing.T) { + expectEqual(t, y(t, "a: >+\n line1\n line2\n\n"), + map[string]any{"a": "line1 line2\n\n"}) +} + +func TestLiteralBlockPreservesInnerIndent(t *testing.T) { + expectEqual(t, y(t, "a: |\n line1\n indented\n line3"), + map[string]any{"a": "line1\n indented\nline3\n"}) +} + +// ===== FLOW COLLECTIONS ===== + +func TestFlowSequence(t *testing.T) { + expectEqual(t, y(t, "a: [1, 2, 3]"), map[string]any{"a": []any{float64(1), float64(2), float64(3)}}) +} + +func TestFlowMapping(t *testing.T) { + expectEqual(t, y(t, "a: {x: 1, y: 2}"), map[string]any{"a": map[string]any{"x": float64(1), "y": float64(2)}}) +} + +func TestNestedFlowInBlock(t *testing.T) { + expectEqual(t, y(t, "a: [1, [2, 3]]"), + map[string]any{"a": []any{float64(1), []any{float64(2), float64(3)}}}) +} + +func TestEmptyFlowSequence(t *testing.T) { + expectEqual(t, y(t, "a: []"), map[string]any{"a": []any{}}) +} + +func TestEmptyFlowMapping(t *testing.T) { + expectEqual(t, y(t, "a: {}"), map[string]any{"a": map[string]any{}}) +} + +func TestFlowAtTopLevelSeq(t *testing.T) { + expectEqual(t, y(t, "[1, 2, 3]"), []any{float64(1), float64(2), float64(3)}) +} + +func TestFlowAtTopLevelMap(t *testing.T) { + expectEqual(t, y(t, "{a: 1, b: 2}"), map[string]any{"a": float64(1), "b": float64(2)}) +} + +// ===== COMMENTS ===== + +func TestLineComment(t *testing.T) { + expectEqual(t, y(t, "a: 1 # comment\nb: 2"), + map[string]any{"a": float64(1), "b": float64(2)}) +} + +func TestFullLineComment(t *testing.T) { + expectEqual(t, y(t, "# this is a comment\na: 1"), map[string]any{"a": float64(1)}) +} + +func TestCommentAfterKey(t *testing.T) { + expectEqual(t, y(t, "a: # comment\n b: 1"), + map[string]any{"a": map[string]any{"b": float64(1)}}) +} + +func TestMultipleComments(t *testing.T) { + expectEqual(t, y(t, "# first\na: 1\n# second\nb: 2"), + map[string]any{"a": float64(1), "b": float64(2)}) +} + +func TestCommentInList(t *testing.T) { + expectEqual(t, y(t, "- a # comment\n- b"), []any{"a", "b"}) +} + +// ===== ANCHORS AND ALIASES ===== + +func TestSimpleAnchorAlias(t *testing.T) { + expectEqual(t, y(t, "a: &ref hello\nb: *ref"), + map[string]any{"a": "hello", "b": "hello"}) +} + +func TestAnchorOnMap(t *testing.T) { + expectEqual(t, y(t, "defaults: &defaults\n x: 1\n y: 2\noverride:\n <<: *defaults\n y: 3"), + map[string]any{ + "defaults": map[string]any{"x": float64(1), "y": float64(2)}, + "override": map[string]any{"x": float64(1), "y": float64(3)}, + }) +} + +func TestAnchorOnSequence(t *testing.T) { + expectEqual(t, y(t, "a: &items\n - 1\n - 2\nb: *items"), + map[string]any{ + "a": []any{float64(1), float64(2)}, + "b": []any{float64(1), float64(2)}, + }) +} + +func TestMultipleAliases(t *testing.T) { + expectEqual(t, y(t, "a: &x 10\nb: &y 20\nc: *x\nd: *y"), + map[string]any{"a": float64(10), "b": float64(20), "c": float64(10), "d": float64(20)}) +} + +// ===== MERGE KEY ===== + +func TestSimpleMerge(t *testing.T) { + expectEqual(t, y(t, "defaults: &d\n a: 1\n b: 2\nresult:\n <<: *d\n c: 3"), + map[string]any{ + "defaults": map[string]any{"a": float64(1), "b": float64(2)}, + "result": map[string]any{"a": float64(1), "b": float64(2), "c": float64(3)}, + }) +} + +func TestMergeOverride(t *testing.T) { + expectEqual(t, y(t, "base: &b\n x: 1\n y: 2\nchild:\n <<: *b\n y: 99"), + map[string]any{ + "base": map[string]any{"x": float64(1), "y": float64(2)}, + "child": map[string]any{"x": float64(1), "y": float64(99)}, + }) +} + +// ===== MULTI-DOCUMENT ===== + +func TestDocumentStartMarker(t *testing.T) { + expectEqual(t, y(t, "---\na: 1"), map[string]any{"a": float64(1)}) +} + +func TestDocumentEndMarker(t *testing.T) { + expectEqual(t, y(t, "a: 1\n..."), map[string]any{"a": float64(1)}) +} + +// ===== TAGS ===== + +func TestExplicitStringTag(t *testing.T) { + expectEqual(t, y(t, "a: !!str 42"), map[string]any{"a": "42"}) +} + +func TestExplicitIntTag(t *testing.T) { + expectEqual(t, y(t, `a: !!int "42"`), map[string]any{"a": float64(42)}) +} + +func TestExplicitFloatTag(t *testing.T) { + expectEqual(t, y(t, `a: !!float "3.14"`), map[string]any{"a": float64(3.14)}) +} + +func TestExplicitBoolTag(t *testing.T) { + expectEqual(t, y(t, `a: !!bool "true"`), map[string]any{"a": true}) +} + +func TestExplicitNullTag(t *testing.T) { + expectEqual(t, y(t, `a: !!null ""`), map[string]any{"a": nil}) +} + +// ===== COMPLEX KEYS ===== + +func TestExplicitKey(t *testing.T) { + expectEqual(t, y(t, "? a\n: 1"), map[string]any{"a": float64(1)}) +} + +func TestNumericKey(t *testing.T) { + expectEqual(t, y(t, "1: one\n2: two"), map[string]any{"1": "one", "2": "two"}) +} + +// ===== DIRECTIVES ===== + +func TestYamlDirective(t *testing.T) { + // Should not error - directive stripped + result, err := Parse("%YAML 1.2\n---\na: 1") + if err != nil { + t.Fatalf("Parse error: %v", err) + } + expectEqual(t, result, map[string]any{"a": float64(1)}) +} + +func TestTagDirective(t *testing.T) { + result, err := Parse("%TAG ! tag:example.com,2000:\n---\na: 1") + if err != nil { + t.Fatalf("Parse error: %v", err) + } + expectEqual(t, result, map[string]any{"a": float64(1)}) +} + +// ===== INDENTATION ===== + +func TestTwoSpaceIndent(t *testing.T) { + expectEqual(t, y(t, "a:\n b: 1"), map[string]any{"a": map[string]any{"b": float64(1)}}) +} + +func TestFourSpaceIndent(t *testing.T) { + expectEqual(t, y(t, "a:\n b: 1"), map[string]any{"a": map[string]any{"b": float64(1)}}) +} + +func TestMixedIndentLevels(t *testing.T) { + expectEqual(t, y(t, "a:\n b:\n c: 1"), + map[string]any{"a": map[string]any{"b": map[string]any{"c": float64(1)}}}) +} + +func TestReturnToOuterIndent(t *testing.T) { + expectEqual(t, y(t, "a:\n b: 1\n c: 2\nd: 3"), + map[string]any{"a": map[string]any{"b": float64(1), "c": float64(2)}, "d": float64(3)}) +} + +func TestMultipleIndentReturns(t *testing.T) { + expectEqual(t, y(t, "a:\n b:\n c: 1\n d: 2\ne: 3"), + map[string]any{"a": map[string]any{"b": map[string]any{"c": float64(1)}, "d": float64(2)}, "e": float64(3)}) +} + +func TestListIndentUnderMap(t *testing.T) { + expectEqual(t, y(t, "a:\n - 1\n - 2\nb: 3"), + map[string]any{"a": []any{float64(1), float64(2)}, "b": float64(3)}) +} + +// ===== MULTILINE PLAIN SCALARS ===== + +func TestContinuationLine(t *testing.T) { + expectEqual(t, y(t, "a: this is\n a long string"), + map[string]any{"a": "this is a long string"}) +} + +func TestMultipleContinuationLines(t *testing.T) { + expectEqual(t, y(t, "a: line one\n line two\n line three"), + map[string]any{"a": "line one line two line three"}) +} + +// ===== WINDOWS LINE ENDINGS ===== + +func TestCRLF(t *testing.T) { + expectEqual(t, y(t, "a: 1\r\nb: 2"), + map[string]any{"a": float64(1), "b": float64(2)}) +} + +func TestCRLFNested(t *testing.T) { + expectEqual(t, y(t, "a:\r\n b: 1\r\n c: 2"), + map[string]any{"a": map[string]any{"b": float64(1), "c": float64(2)}}) +} + +func TestCRLFList(t *testing.T) { + expectEqual(t, y(t, "- a\r\n- b"), []any{"a", "b"}) +} + +// ===== SPECIAL CHARS IN VALUES ===== + +func TestValueWithHashNotComment(t *testing.T) { + expectEqual(t, y(t, "a: foo#bar"), map[string]any{"a": "foo#bar"}) +} + +func TestKeyWithSpaces(t *testing.T) { + expectEqual(t, y(t, "a long key: value"), map[string]any{"a long key": "value"}) +} + +// ===== SEQUENCE OF MAPPINGS ===== + +func TestCompactNotation(t *testing.T) { + expectEqual(t, y(t, "- name: alice\n age: 30\n- name: bob\n age: 25"), + []any{ + map[string]any{"name": "alice", "age": float64(30)}, + map[string]any{"name": "bob", "age": float64(25)}, + }) +} + +func TestSingleKeyPerElement(t *testing.T) { + expectEqual(t, y(t, "- a: 1\n- b: 2\n- c: 3"), + []any{map[string]any{"a": float64(1)}, map[string]any{"b": float64(2)}, map[string]any{"c": float64(3)}}) +} + +func TestNestedInMap(t *testing.T) { + expectEqual(t, y(t, "people:\n - name: alice\n - name: bob"), + map[string]any{"people": []any{map[string]any{"name": "alice"}, map[string]any{"name": "bob"}}}) +} + +// ===== REAL-WORLD YAML PATTERNS ===== + +func TestDockerComposeLike(t *testing.T) { + expectEqual(t, y(t, "version: 3\nservices:\n web:\n image: nginx\n ports:\n - 80\n - 443"), + map[string]any{ + "version": float64(3), + "services": map[string]any{ + "web": map[string]any{ + "image": "nginx", + "ports": []any{float64(80), float64(443)}, + }, + }, + }) +} + +func TestGithubActionsLike(t *testing.T) { + expectEqual(t, y(t, "name: build\non:\n push:\n branches:\n - main\njobs:\n test:\n runs-on: ubuntu"), + map[string]any{ + "name": "build", + "on": map[string]any{"push": map[string]any{"branches": []any{"main"}}}, + "jobs": map[string]any{"test": map[string]any{"runs-on": "ubuntu"}}, + }) +} + +func TestKubernetesLike(t *testing.T) { + expectEqual(t, y(t, "apiVersion: v1\nkind: Pod\nmetadata:\n name: myapp\n labels:\n app: myapp\nspec:\n containers:\n - name: web\n image: nginx"), + map[string]any{ + "apiVersion": "v1", + "kind": "Pod", + "metadata": map[string]any{"name": "myapp", "labels": map[string]any{"app": "myapp"}}, + "spec": map[string]any{"containers": []any{map[string]any{"name": "web", "image": "nginx"}}}, + }) +} + +func TestAnsibleLike(t *testing.T) { + expectEqual(t, y(t, "- name: install packages\n become: true\n- name: start service\n become: false"), + []any{ + map[string]any{"name": "install packages", "become": true}, + map[string]any{"name": "start service", "become": false}, + }) +} + +func TestConfigFileLike(t *testing.T) { + expectEqual(t, y(t, "database:\n host: localhost\n port: 5432\n name: mydb\ncache:\n enabled: true\n ttl: 3600"), + map[string]any{ + "database": map[string]any{"host": "localhost", "port": float64(5432), "name": "mydb"}, + "cache": map[string]any{"enabled": true, "ttl": float64(3600)}, + }) +} + +// ===== EMPTY INPUT ===== + +func TestEmptyInput(t *testing.T) { + result, err := Parse("") + if err != nil { + t.Fatalf("Parse error: %v", err) + } + if result != nil { + t.Errorf("expected nil, got %v", result) + } +} + +func TestWhitespaceOnly(t *testing.T) { + result, err := Parse(" \n \n ") + if err != nil { + t.Fatalf("Parse error: %v", err) + } + if result != nil { + t.Errorf("expected nil, got %v", result) + } +} + +// ===== HAPPY PATH ===== + +func TestHappy(t *testing.T) { + expectEqual(t, y(t, "a: 1\nb: 2\nc:\n d: 3\n e: 4\n f:\n - g\n - h\n"), + map[string]any{ + "a": float64(1), + "b": float64(2), + "c": map[string]any{ + "d": float64(3), + "e": float64(4), + "f": []any{"g", "h"}, + }, + }) +} From 7ab8c86370f9e2ca911a95c1c9a013aa7206f095 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 12 Mar 2026 16:30:00 +0000 Subject: [PATCH 09/11] Add GitHub Actions workflow for Node.js and Go builds Runs build and test on ubuntu, windows, and macos for both the TypeScript (Node 24) and Go (1.24) implementations. https://claude.ai/code/session_01H3rUS9E1u5eXZrzyYiBPB3 --- .github/workflows/build.yml | 49 +++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 .github/workflows/build.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..1742e9c --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,49 @@ +name: build + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + node: + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + node-version: [24.x] + + runs-on: ${{ matrix.os }} + + steps: + - uses: actions/checkout@v4 + - name: Use Node.js ${{ matrix.node-version }} + uses: actions/setup-node@v4 + with: + node-version: ${{ matrix.node-version }} + - run: npm i + - run: npm run build --if-present + - run: npm test + + go: + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + go-version: ['1.24'] + + runs-on: ${{ matrix.os }} + + steps: + - uses: actions/checkout@v4 + - name: Use Go ${{ matrix.go-version }} + uses: actions/setup-go@v5 + with: + go-version: ${{ matrix.go-version }} + - name: Build + working-directory: go + run: go build ./... + - name: Test + working-directory: go + run: go test -v ./... From 69a1d5882c6ae5603b211caa35efc24d70d53ae8 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 12 Mar 2026 16:55:03 +0000 Subject: [PATCH 10/11] Fix LE5A test failure on Windows by normalizing CRLF line endings On Windows, readFileSync returns \r\n line endings which cause the YAML parser to fail on certain inputs (e.g. LE5A with trailing empty lines after !!str). Normalize to \n when reading in.yaml test files. https://claude.ai/code/session_01H3rUS9E1u5eXZrzyYiBPB3 --- test/yaml-test-suite.test.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/yaml-test-suite.test.ts b/test/yaml-test-suite.test.ts index bcfcd8a..1ebc627 100644 --- a/test/yaml-test-suite.test.ts +++ b/test/yaml-test-suite.test.ts @@ -215,7 +215,7 @@ describe('yaml-test-suite', () => { const skipReason = SKIP[tc.id] test(`${tc.id}: ${tc.name}`, { skip: skipReason || undefined }, () => { - const inYaml = readFileSync(join(tc.dir, 'in.yaml'), 'utf8') + const inYaml = readFileSync(join(tc.dir, 'in.yaml'), 'utf8').replace(/\r\n/g, '\n') const inJsonRaw = readFileSync(join(tc.dir, 'in.json'), 'utf8') const { value: expected, multiDoc } = parseExpectedJson(inJsonRaw) @@ -247,7 +247,7 @@ describe('yaml-test-suite', () => { const skipReason = SKIP[tc.id] test(`${tc.id}: ${tc.name}`, { skip: skipReason || undefined }, () => { - const inYaml = readFileSync(join(tc.dir, 'in.yaml'), 'utf8') + const inYaml = readFileSync(join(tc.dir, 'in.yaml'), 'utf8').replace(/\r\n/g, '\n') const j = Jsonic.make().use(Yaml) let threw = false From ad14455b3694c56326cf5a5994ea7c5f02056d37 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 12 Mar 2026 16:56:07 +0000 Subject: [PATCH 11/11] Add csv/ to .gitignore https://claude.ai/code/session_01H3rUS9E1u5eXZrzyYiBPB3 --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 3bc6cd2..eceeb93 100644 --- a/.gitignore +++ b/.gitignore @@ -54,3 +54,6 @@ test_preprocess3.js test_preprocess_debug.js test_specific.js update_skip.js + +# Unrelated local directories +csv/