diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..1742e9c --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,49 @@ +name: build + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + node: + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + node-version: [24.x] + + runs-on: ${{ matrix.os }} + + steps: + - uses: actions/checkout@v4 + - name: Use Node.js ${{ matrix.node-version }} + uses: actions/setup-node@v4 + with: + node-version: ${{ matrix.node-version }} + - run: npm i + - run: npm run build --if-present + - run: npm test + + go: + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + go-version: ['1.24'] + + runs-on: ${{ matrix.os }} + + steps: + - uses: actions/checkout@v4 + - name: Use Go ${{ matrix.go-version }} + uses: actions/setup-go@v5 + with: + go-version: ${{ matrix.go-version }} + - name: Build + working-directory: go + run: go build ./... + - name: Test + working-directory: go + run: go test -v ./... diff --git a/.gitignore b/.gitignore index 3bc6cd2..eceeb93 100644 --- a/.gitignore +++ b/.gitignore @@ -54,3 +54,6 @@ test_preprocess3.js test_preprocess_debug.js test_specific.js update_skip.js + +# Unrelated local directories +csv/ diff --git a/go/go.mod b/go/go.mod new file mode 100644 index 0000000..375e3d8 --- /dev/null +++ b/go/go.mod @@ -0,0 +1,5 @@ +module github.com/jsonicjs/yaml/go + +go 1.24.7 + +require github.com/jsonicjs/jsonic/go v0.1.4 diff --git a/go/go.sum b/go/go.sum new file mode 100644 index 0000000..dc99d17 --- /dev/null +++ b/go/go.sum @@ -0,0 +1,2 @@ +github.com/jsonicjs/jsonic/go v0.1.4 h1:V1KEzmg/jIwk25+JYj8ig1+B7190rHmH8WqZbT7XlgA= +github.com/jsonicjs/jsonic/go v0.1.4/go.mod h1:ObNKlCG7esWoi4AHCpdgkILvPINV8bpvkbCd4llGGUg= diff --git a/go/grammar.go b/go/grammar.go new file mode 100644 index 0000000..50f0c6e --- /dev/null +++ b/go/grammar.go @@ -0,0 +1,561 @@ +package yaml + +import ( + jsonic "github.com/jsonicjs/jsonic/go" +) + +// configureGrammarRules sets up YAML-specific grammar rules. +func configureGrammarRules(j *jsonic.Jsonic, IN, EL jsonic.Tin, KEY []jsonic.Tin, + CL, ZZ, CA, CS, CB, TX, ST, VL, NR jsonic.Tin, + anchors map[string]any, pendingAnchors *[]anchorInfo) { + + // ===== val rule ===== + j.Rule("val", func(rs *jsonic.RuleSpec) { + rs.PrependOpen( + // Indent followed by content: push indent rule. + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{IN}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + parentIn, hasParentIn := r.K["yamlIn"] + listIn, hasListIn := r.K["yamlListIn"] + if hasListIn && listIn != nil { + if listInVal, ok := toInt(listIn); ok { + if t0Val, ok := toInt(ctx.T0.Val); ok { + if t0Val <= listInVal { + return false + } + } + } + } + if !hasParentIn || parentIn == nil { + return true + } + if parentInVal, ok := toInt(parentIn); ok { + if t0Val, ok := toInt(ctx.T0.Val); ok { + return t0Val > parentInVal + } + } + return true + }, + P: "indent", + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + if v, ok := toInt(r.O0.Val); ok { + r.N["in"] = v + } + }, + }, + + // Same indent followed by element marker: list value at map level. + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{IN}, {EL}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + parentIn, hasParentIn := r.K["yamlIn"] + if !hasParentIn || parentIn == nil { + return false + } + if parentInVal, ok := toInt(parentIn); ok { + if t0Val, ok := toInt(ctx.T0.Val); ok { + return t0Val == parentInVal + } + } + return false + }, + P: "yamlBlockList", + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + if v, ok := toInt(r.O0.Val); ok { + r.N["in"] = v + } + }, + }, + + // End of input means empty value. + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{ZZ}}, + B: 1, + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + r.Node = nil + }, + }, + + // Same or lesser indent: empty value — backtrack. + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{IN}}, + B: 1, + U: map[string]any{"yamlEmpty": true}, + }, + + // This value is a list. + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{EL}}, + P: "yamlBlockList", + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + r.N["in"] = r.O0.CI - 1 + }, + }, + ) + + // After open: claim pending anchors. + rs.AddAO(func(r *jsonic.Rule, ctx *jsonic.Context) { + if len(*pendingAnchors) > 0 { + anchorsCopy := make([]anchorInfo, len(*pendingAnchors)) + copy(anchorsCopy, *pendingAnchors) + r.U["yamlAnchors"] = anchorsCopy + r.U["yamlAnchorOpenNode"] = r.Node + *pendingAnchors = (*pendingAnchors)[:0] + } + }) + + // Before close: follow replacement chain from child to get final node. + rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { + // Follow the replacement chain from the child to find the + // final sibling's Node (e.g., yamlBlockList → yamlBlockElem chain). + child := r.Child + if child != nil && child != jsonic.NoRule { + final := child + for final.Next != nil && final.Next != jsonic.NoRule && + final.Next.Prev == final { + final = final.Next + } + if final != child && !jsonic.IsUndefined(final.Node) { + r.Node = final.Node + } + } + }) + + // Before close: handle empty values. + rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { + if _, ok := r.U["yamlEmpty"]; ok { + r.Node = jsonic.Undefined + } + }) + + // Close on indent tokens. + rs.PrependClose( + &jsonic.AltSpec{S: [][]jsonic.Tin{{IN}}, B: 1}, + ) + + // After close: resolve aliases and record anchors. + rs.AddAC(func(r *jsonic.Rule, ctx *jsonic.Context) { + // Resolve alias markers. + if m, ok := r.Node.(map[string]any); ok { + if alias, ok := m["__yamlAlias"].(string); ok { + val, exists := anchors[alias] + if exists { + switch v := val.(type) { + case map[string]any, []any: + r.Node = deepCopy(v) + default: + r.Node = val + } + } + } + } + + // Record anchors. + if anchorList, ok := r.U["yamlAnchors"]; ok { + anchorsSlice, ok := anchorList.([]anchorInfo) + if ok { + for _, anchor := range anchorsSlice { + if anchor.inline { + openNode := r.U["yamlAnchorOpenNode"] + if openNode != nil { + switch openNode.(type) { + case map[string]any, []any: + // Don't overwrite with final compound value. + continue + } + } + } + val := r.Node + switch v := val.(type) { + case map[string]any, []any: + val = deepCopy(v) + } + anchors[anchor.name] = val + } + } + } + }) + }) + + // ===== indent rule ===== + j.Rule("indent", func(rs *jsonic.RuleSpec) { + rs.Clear() + rs.Open = []*jsonic.AltSpec{ + // Key pair → map. + {S: [][]jsonic.Tin{KEY, {CL}}, P: "map", B: 2}, + // Element → list. + {S: [][]jsonic.Tin{{EL}}, P: "list"}, + // Plain value after indent. + {S: [][]jsonic.Tin{KEY}, + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + if r.O0.Tin == ST || r.O0.Tin == TX { + r.Node = r.O0.Val + } else { + r.Node = r.O0.Src + } + }, + }, + } + rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { + if !jsonic.IsUndefined(r.Child.Node) { + r.Node = r.Child.Node + } + }) + }) + + // ===== yamlBlockList rule ===== + j.Rule("yamlBlockList", func(rs *jsonic.RuleSpec) { + rs.Clear() + rs.AddBO(func(r *jsonic.Rule, ctx *jsonic.Context) { + r.Node = make([]any, 0) + r.K["yamlBlockArr"] = r.Node + r.K["yamlListIn"] = r.N["in"] + }) + rs.Open = []*jsonic.AltSpec{ + {S: [][]jsonic.Tin{KEY, {CL}}, P: "yamlElemMap", B: 2, + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + r.K["yamlMapIn"] = r.N["in"] + 2 + }, + }, + {P: "val"}, + } + rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { + val := r.Child.Node + if jsonic.IsUndefined(val) { + val = nil + } + if arr, ok := r.K["yamlBlockArr"].([]any); ok { + arr = append(arr, val) + r.K["yamlBlockArr"] = arr + r.Node = arr + } + }) + rs.Close = []*jsonic.AltSpec{ + {S: [][]jsonic.Tin{{IN}, {EL}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(ctx.T0.Val); ok { + return v == r.N["in"] + } + return false + }, + R: "yamlBlockElem", + }, + {S: [][]jsonic.Tin{{IN}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(ctx.T0.Val); ok { + return v <= r.N["in"] + } + return false + }, + B: 1, + }, + {S: [][]jsonic.Tin{{EL}}, R: "yamlBlockElem"}, + {S: [][]jsonic.Tin{{ZZ}}, B: 1}, + } + }) + + // ===== yamlBlockElem rule ===== + j.Rule("yamlBlockElem", func(rs *jsonic.RuleSpec) { + rs.Clear() + rs.AddBO(func(r *jsonic.Rule, ctx *jsonic.Context) { + r.Node = r.K["yamlBlockArr"] + }) + rs.Open = []*jsonic.AltSpec{ + {S: [][]jsonic.Tin{KEY, {CL}}, P: "yamlElemMap", B: 2, + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + r.K["yamlMapIn"] = r.N["in"] + 2 + }, + }, + {P: "val"}, + } + rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { + val := r.Child.Node + if jsonic.IsUndefined(val) { + val = nil + } + if arr, ok := r.K["yamlBlockArr"].([]any); ok { + arr = append(arr, val) + r.K["yamlBlockArr"] = arr + r.Node = arr + } + }) + rs.Close = []*jsonic.AltSpec{ + {S: [][]jsonic.Tin{{IN}, {EL}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(ctx.T0.Val); ok { + return v == r.N["in"] + } + return false + }, + R: "yamlBlockElem", + }, + {S: [][]jsonic.Tin{{IN}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(ctx.T0.Val); ok { + return v <= r.N["in"] + } + return false + }, + B: 1, + }, + {S: [][]jsonic.Tin{{EL}}, R: "yamlBlockElem"}, + {S: [][]jsonic.Tin{{ZZ}}, B: 1}, + } + }) + + // ===== list rule amendments ===== + j.Rule("list", func(rs *jsonic.RuleSpec) { + rs.AddBO(func(r *jsonic.Rule, ctx *jsonic.Context) { + r.K["yamlListIn"] = r.N["in"] + }) + rs.PrependClose( + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{IN}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(ctx.T0.Val); ok { + return v <= r.N["in"] + } + return false + }, + B: 1, + }, + ) + }) + + // ===== map rule amendments ===== + j.Rule("map", func(rs *jsonic.RuleSpec) { + rs.AddBO(func(r *jsonic.Rule, ctx *jsonic.Context) { + if _, ok := r.N["in"]; !ok { + r.N["in"] = 0 + } + r.K["yamlIn"] = r.N["in"] + }) + rs.PrependOpen( + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{IN}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(r.O0.Val); ok { + return v == r.N["in"] + } + return false + }, + R: "pair", + }, + ) + // Handle merge keys. + rs.AddAC(func(r *jsonic.Rule, ctx *jsonic.Context) { + m, ok := r.Node.(map[string]any) + if !ok { + return + } + mergeVal, hasMerge := m["<<"] + if !hasMerge { + return + } + delete(m, "<<") + switch mv := mergeVal.(type) { + case []any: + for _, item := range mv { + if mm, ok := item.(map[string]any); ok { + for k, v := range mm { + if _, exists := m[k]; !exists { + m[k] = v + } + } + } + } + case map[string]any: + for k, v := range mv { + if _, exists := m[k]; !exists { + m[k] = v + } + } + } + }) + rs.PrependClose( + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{IN}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(ctx.T0.Val); ok { + return v < r.N["in"] + } + return false + }, + B: 1, + }, + ) + }) + + // ===== pair rule amendments ===== + j.Rule("pair", func(rs *jsonic.RuleSpec) { + rs.PrependOpen( + &jsonic.AltSpec{S: [][]jsonic.Tin{{ZZ}}, B: 1}, + ) + rs.PrependClose( + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{IN}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(ctx.T0.Val); ok { + return v == r.N["in"] + } + return false + }, + R: "pair", + }, + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{IN}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(ctx.T0.Val); ok { + return v < r.N["in"] + } + return false + }, + B: 1, + }, + ) + }) + + // ===== yamlElemMap rule ===== + j.Rule("yamlElemMap", func(rs *jsonic.RuleSpec) { + rs.Clear() + rs.AddBO(func(r *jsonic.Rule, ctx *jsonic.Context) { + r.Node = make(map[string]any) + }) + rs.Open = []*jsonic.AltSpec{ + {S: [][]jsonic.Tin{KEY, {CL}}, P: "val", + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + r.U["key"] = extractKey(r.O0, anchors) + }, + }, + } + rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { + if key := r.U["key"]; key != nil { + if m, ok := r.Node.(map[string]any); ok { + val := r.Child.Node + if jsonic.IsUndefined(val) { + val = nil + } + m[formatKey(key)] = val + } + } + }) + rs.Close = []*jsonic.AltSpec{ + {S: [][]jsonic.Tin{{IN}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(ctx.T0.Val); ok { + if mapIn, ok := toInt(r.K["yamlMapIn"]); ok { + return v == mapIn + } + } + return false + }, + R: "yamlElemPair", + }, + {S: [][]jsonic.Tin{{IN}}, B: 1}, + {S: [][]jsonic.Tin{{CA}}, B: 1}, + {S: [][]jsonic.Tin{{CS}}, B: 1}, + {S: [][]jsonic.Tin{{CB}}, B: 1}, + {S: [][]jsonic.Tin{{ZZ}}}, + } + }) + + // ===== yamlElemPair rule ===== + j.Rule("yamlElemPair", func(rs *jsonic.RuleSpec) { + rs.Clear() + rs.Open = []*jsonic.AltSpec{ + {S: [][]jsonic.Tin{KEY, {CL}}, P: "val", + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + r.U["key"] = extractKey(r.O0, anchors) + }, + }, + } + rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { + if key := r.U["key"]; key != nil { + if m, ok := r.Node.(map[string]any); ok { + val := r.Child.Node + if jsonic.IsUndefined(val) { + val = nil + } + m[formatKey(key)] = val + } + } + }) + rs.Close = []*jsonic.AltSpec{ + {S: [][]jsonic.Tin{{IN}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(ctx.T0.Val); ok { + if mapIn, ok := toInt(r.K["yamlMapIn"]); ok { + return v == mapIn + } + } + return false + }, + R: "yamlElemPair", + }, + {S: [][]jsonic.Tin{{IN}}, B: 1}, + {S: [][]jsonic.Tin{{CA}}, B: 1}, + {S: [][]jsonic.Tin{{CS}}, B: 1}, + {S: [][]jsonic.Tin{{CB}}, B: 1}, + {S: [][]jsonic.Tin{{ZZ}}}, + } + }) + + // ===== elem rule amendments ===== + j.Rule("elem", func(rs *jsonic.RuleSpec) { + rs.PrependOpen( + &jsonic.AltSpec{S: [][]jsonic.Tin{KEY, {CL}}, P: "yamlElemMap", B: 2, + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + r.K["yamlMapIn"] = r.N["in"] + 2 + }, + }, + ) + rs.PrependClose( + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{IN}, {EL}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(ctx.T0.Val); ok { + return v == r.N["in"] + } + return false + }, + R: "elem", + }, + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{IN}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(ctx.T0.Val); ok { + return v == r.N["in"] + } + return false + }, + B: 1, + }, + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{IN}}, + C: func(r *jsonic.Rule, ctx *jsonic.Context) bool { + if v, ok := toInt(ctx.T0.Val); ok { + return v < r.N["in"] + } + return false + }, + B: 1, + }, + &jsonic.AltSpec{S: [][]jsonic.Tin{{EL}}, R: "elem"}, + ) + }) +} + +// toInt converts an any value to int. +func toInt(v any) (int, bool) { + switch n := v.(type) { + case int: + return n, true + case float64: + return int(n), true + case int64: + return int(n), true + default: + return 0, false + } +} diff --git a/go/plugin.go b/go/plugin.go new file mode 100644 index 0000000..ddef5bc --- /dev/null +++ b/go/plugin.go @@ -0,0 +1,1967 @@ +package yaml + +import ( + "regexp" + "strconv" + "strings" + + jsonic "github.com/jsonicjs/jsonic/go" +) + +// Yaml is a jsonic plugin that adds YAML parsing support. +func Yaml(j *jsonic.Jsonic, _ map[string]any) { + TX := j.Token("#TX") + NR := j.Token("#NR") + ST := j.Token("#ST") + VL := j.Token("#VL") + CL := j.Token("#CL") + ZZ := j.Token("#ZZ") + CA := j.Token("#CA") + CS := j.Token("#CS") + CB := j.Token("#CB") + + // Register custom tokens. + IN := j.Token("#IN") // Indent token + EL := j.Token("#EL") // Element marker (- ) + + KEY := []jsonic.Tin{TX, NR, ST, VL} + + // Shared state for the plugin instance. + anchors := make(map[string]any) + var pendingAnchors []anchorInfo + pendingExplicitCL := false + var pendingTokens []*jsonic.Token + tagHandles := make(map[string]string) + + // Remove colon as a fixed token — YAML uses ": " (colon-space). + cfg := j.Config() + delete(cfg.FixedTokens, ":") + cfg.SortFixedTokens() + + // Add colon as an ender char so text tokens stop at ":". + if cfg.EnderChars == nil { + cfg.EnderChars = make(map[rune]bool) + } + cfg.EnderChars[':'] = true + + // ===== TextCheck: handles block scalars, !!tags, and plain scalars ===== + cfg.TextCheck = func(lex *jsonic.Lex) *jsonic.LexCheckResult { + pnt := lex.Cursor() + src := lex.Src + fwd := src[pnt.SI:] + if len(fwd) == 0 { + return nil + } + ch := fwd[0] + + // Block scalar: | or > + if ch == '|' || ch == '>' { + return handleBlockScalar(lex, pnt, src, fwd, ch) + } + + // !!type tags in text check context + if ch == '!' && len(fwd) > 1 && fwd[1] == '!' { + return handleTagInTextCheck(lex, pnt, fwd, tagHandles) + } + + // Skip special chars that should be handled by other matchers. + if ch == '{' || ch == '}' || ch == '[' || ch == ']' || + ch == ',' || ch == '#' || ch == '\n' || ch == '\r' || + ch == '"' || ch == '\'' || ch == '*' || ch == '&' || ch == '!' { + return nil + } + + // Colon followed by space/tab/newline/eof is a separator, not text. + if ch == ':' && (len(fwd) < 2 || fwd[1] == ' ' || fwd[1] == '\t' || fwd[1] == '\n' || fwd[1] == '\r') { + return nil + } + + // Plain scalar — scan to end of line, handling multiline continuation. + return handlePlainScalar(lex, pnt, src, fwd) + } + + // ===== Custom YAML matcher (priority 500000 — before fixed tokens) ===== + srcCleaned := false + + j.AddMatcher("yaml", 500000, func(lex *jsonic.Lex) *jsonic.Token { + pnt := lex.Cursor() + src := lex.Src + + // First call: clean source (strip directives, initial ---). + if !srcCleaned { + srcCleaned = true + cleaned := cleanSource(src, tagHandles) + if cleaned != src { + lex.Src = cleaned + pnt.Len = len(cleaned) + } + } + + if pnt.SI >= pnt.Len { + return nil + } + + // Emit pending tokens (from explicit key handling). + if len(pendingTokens) > 0 { + tkn := pendingTokens[0] + pendingTokens = pendingTokens[1:] + return tkn + } + + // Emit pending explicit CL token. + if pendingExplicitCL { + pendingExplicitCL = false + tkn := lex.Token("#CL", CL, 1, ": ") + return tkn + } + + fwd := lex.Src[pnt.SI:] + if len(fwd) == 0 { + return nil + } + + // Process YAML features in a loop to handle chaining. + for { + if pnt.SI >= pnt.Len { + return nil + } + fwd = lex.Src[pnt.SI:] + if len(fwd) == 0 { + return nil + } + + // Alias: *name + if fwd[0] == '*' { + nameEnd := 1 + for nameEnd < len(fwd) && fwd[nameEnd] != ' ' && fwd[nameEnd] != '\t' && + fwd[nameEnd] != '\n' && fwd[nameEnd] != '\r' && fwd[nameEnd] != ',' && + fwd[nameEnd] != '{' && fwd[nameEnd] != '}' && fwd[nameEnd] != '[' && + fwd[nameEnd] != ']' { + nameEnd++ + } + aliasName := fwd[1:nameEnd] + if val, ok := anchors[aliasName]; ok { + var tkn *jsonic.Token + switch v := val.(type) { + case string: + tkn = lex.Token("#TX", TX, v, fwd[:nameEnd]) + case float64: + tkn = lex.Token("#NR", NR, v, fwd[:nameEnd]) + case bool: + tkn = lex.Token("#VL", VL, v, fwd[:nameEnd]) + case nil: + tkn = lex.Token("#VL", VL, nil, fwd[:nameEnd]) + default: + // Complex value — use alias marker for later resolution. + tkn = lex.Token("#VL", VL, map[string]any{"__yamlAlias": aliasName}, fwd[:nameEnd]) + } + pnt.SI += nameEnd + pnt.CI += nameEnd + return tkn + } + // Unknown alias — return as marker. + tkn := lex.Token("#VL", VL, map[string]any{"__yamlAlias": aliasName}, fwd[:nameEnd]) + pnt.SI += nameEnd + pnt.CI += nameEnd + return tkn + } + + // Anchor: &name + if fwd[0] == '&' { + nameEnd := 1 + for nameEnd < len(fwd) && fwd[nameEnd] != ' ' && fwd[nameEnd] != '\t' && + fwd[nameEnd] != '\n' && fwd[nameEnd] != '\r' && fwd[nameEnd] != ',' && + fwd[nameEnd] != '{' && fwd[nameEnd] != '}' && fwd[nameEnd] != '[' && + fwd[nameEnd] != ']' { + nameEnd++ + } + anchorName := fwd[1:nameEnd] + anchorInline := true + + // Check if anchor is standalone (nothing meaningful after it on the line). + afterAnchor := nameEnd + for afterAnchor < len(fwd) && (fwd[afterAnchor] == ' ' || fwd[afterAnchor] == '\t') { + afterAnchor++ + } + isStandalone := afterAnchor >= len(fwd) || fwd[afterAnchor] == '\n' || + fwd[afterAnchor] == '\r' || fwd[afterAnchor] == '#' + + if isStandalone { + anchorInline = false + } + + // Try to capture inline scalar value for the anchor. + if anchorInline && afterAnchor < len(fwd) { + peek := fwd[afterAnchor:] + var scalarVal any + pch := byte(0) + if len(peek) > 0 { + pch = peek[0] + } + if pch == '"' { + ei := 1 + for ei < len(peek) && peek[ei] != '"' { + if peek[ei] == '\\' { + ei++ + } + ei++ + } + raw := peek[1:ei] + raw = strings.ReplaceAll(raw, "\\n", "\n") + raw = strings.ReplaceAll(raw, "\\t", "\t") + raw = strings.ReplaceAll(raw, "\\\\", "\\") + raw = strings.ReplaceAll(raw, `\"`, `"`) + scalarVal = raw + } else if pch == '\'' { + ei := 1 + for ei < len(peek) && peek[ei] != '\'' { + if ei+1 < len(peek) && peek[ei] == '\'' && peek[ei+1] == '\'' { + ei++ + } + ei++ + } + raw := peek[1:ei] + raw = strings.ReplaceAll(raw, "''", "'") + scalarVal = raw + } else if pch != 0 && pch != '{' && pch != '[' && pch != '\n' && pch != '\r' { + ei := 0 + for ei < len(peek) && peek[ei] != '\n' && peek[ei] != '\r' && + peek[ei] != ',' && peek[ei] != '}' && peek[ei] != ']' { + if peek[ei] == ':' && (ei+1 >= len(peek) || peek[ei+1] == ' ' || + peek[ei+1] == '\t' || peek[ei+1] == '\n' || peek[ei+1] == '\r') { + break + } + if peek[ei] == ' ' && ei+1 < len(peek) && peek[ei+1] == '#' { + break + } + ei++ + } + raw := strings.TrimRight(peek[:ei], " \t") + if len(raw) > 0 { + scalarVal = raw + } + } + if scalarVal != nil { + anchors[anchorName] = scalarVal + } + } + + pendingAnchors = append(pendingAnchors, anchorInfo{name: anchorName, inline: anchorInline}) + + // Consume the anchor name (and trailing spaces, but NOT the newline). + skip := nameEnd + for skip < len(fwd) && (fwd[skip] == ' ' || fwd[skip] == '\t') { + skip++ + } + // Skip comments after anchor. + if skip < len(fwd) && fwd[skip] == '#' { + for skip < len(fwd) && fwd[skip] != '\n' && fwd[skip] != '\r' { + skip++ + } + } + pnt.SI += skip + pnt.CI += skip + + continue // Re-loop to process what follows the anchor + } + + // Directive lines (%YAML, %TAG, etc.): skip to --- + if fwd[0] == '%' { + pos := 0 + for pos < len(fwd) { + if isDocMarker(fwd, pos) { + break + } + for pos < len(fwd) && fwd[pos] != '\n' && fwd[pos] != '\r' { + pos++ + } + if pos < len(fwd) && fwd[pos] == '\r' { + pos++ + } + if pos < len(fwd) && fwd[pos] == '\n' { + pos++ + } + pnt.RI++ + } + pnt.SI += pos + pnt.CI = 0 + continue + } + + // Non-specific tag: ! value + if fwd[0] == '!' && len(fwd) > 1 && fwd[1] != '!' { + if fwd[1] == ' ' { + // Non-specific tag: ! value + valStart := 2 + valEnd := valStart + for valEnd < len(fwd) && fwd[valEnd] != '\n' && fwd[valEnd] != '\r' { + valEnd++ + } + rawVal := trimRight(fwd[valStart:valEnd]) + tkn := lex.Token("#TX", TX, rawVal, fwd[:valEnd]) + pnt.SI += valEnd + pnt.CI += valEnd + return tkn + } + // Local tag: !name value — skip the tag. + tagEnd := 1 + for tagEnd < len(fwd) && fwd[tagEnd] != ' ' && fwd[tagEnd] != '\n' && fwd[tagEnd] != '\r' { + tagEnd++ + } + if tagEnd < len(fwd) && fwd[tagEnd] == ' ' { + tagEnd++ + } + pnt.SI += tagEnd + pnt.CI += tagEnd + // If tag is standalone, consume newline + spaces. + if pnt.SI < pnt.Len && (lex.Src[pnt.SI] == '\n' || lex.Src[pnt.SI] == '\r') { + tagStandalone := true + tagLineIndent := 0 + tbi := pnt.SI - tagEnd - 1 + for tbi >= 0 && lex.Src[tbi] != '\n' && lex.Src[tbi] != '\r' { + if lex.Src[tbi] != ' ' && lex.Src[tbi] != '\t' { + tagStandalone = false + break + } + tagLineIndent++ + tbi-- + } + _ = tagLineIndent + if tagStandalone { + nl := pnt.SI + if nl < pnt.Len && lex.Src[nl] == '\r' { + nl++ + } + if nl < pnt.Len && lex.Src[nl] == '\n' { + nl++ + } + spaces := 0 + for nl+spaces < pnt.Len && lex.Src[nl+spaces] == ' ' { + spaces++ + } + pnt.SI = nl + spaces + pnt.CI = spaces + pnt.RI++ + } + } + continue + } + + // !!seq, !!map, !!omap, etc. structural tags — skip them. + structTagRe := regexp.MustCompile(`^!!(seq|map|omap|set|pairs|binary|ordered|python/\S*)`) + if fwd[0] == '!' && len(fwd) > 1 && fwd[1] == '!' && structTagRe.MatchString(fwd) { + skip := 2 + for skip < len(fwd) && fwd[skip] != ' ' && fwd[skip] != '\n' { + skip++ + } + for skip < len(fwd) && fwd[skip] == ' ' { + skip++ + } + // If standalone, consume newline. + tagIndent := 0 + tbi := pnt.SI - 1 + standalone := true + for tbi >= 0 && lex.Src[tbi] != '\n' && lex.Src[tbi] != '\r' { + if lex.Src[tbi] != ' ' && lex.Src[tbi] != '\t' { + standalone = false + break + } + tagIndent++ + tbi-- + } + if standalone && skip < len(fwd) && (fwd[skip] == '\n' || fwd[skip] == '\r') { + nl := skip + if nl < len(fwd) && fwd[nl] == '\r' { + nl++ + } + if nl < len(fwd) && fwd[nl] == '\n' { + nl++ + } + spaces := 0 + for nl+spaces < len(fwd) && fwd[nl+spaces] == ' ' { + spaces++ + } + if spaces >= tagIndent { + skip = nl + spaces + pnt.SI += skip + pnt.CI = spaces + pnt.RI++ + continue + } + } + pnt.SI += skip + pnt.CI += skip + continue + } + + // !!type tags (!!str, !!int, !!float, !!bool, !!null). + if fwd[0] == '!' && len(fwd) > 1 && fwd[1] == '!' { + return handleTypeTag(lex, pnt, fwd, tagHandles, &pendingAnchors, anchors, TX, NR, VL, ST) + } + + // Explicit key: ? key + if fwd[0] == '?' && (len(fwd) < 2 || fwd[1] == ' ' || fwd[1] == '\t' || + fwd[1] == '\n' || fwd[1] == '\r') { + return handleExplicitKey(lex, pnt, fwd, &pendingExplicitCL, &pendingTokens, TX, CL, VL) + } + + // Document markers: --- and ... + if isDocMarker(fwd, 0) { + return handleDocMarker(lex, pnt, fwd, IN, &pendingAnchors, anchors, TX) + } + + // Re-check patterns after --- fall-through. + if fwd[0] == '%' { + pos := 0 + for pos < len(fwd) { + if isDocMarker(fwd, pos) { + break + } + for pos < len(fwd) && fwd[pos] != '\n' && fwd[pos] != '\r' { + pos++ + } + if pos < len(fwd) && fwd[pos] == '\r' { + pos++ + } + if pos < len(fwd) && fwd[pos] == '\n' { + pos++ + } + pnt.RI++ + } + pnt.SI += pos + pnt.CI = 0 + continue + } + + // Non-specific tag after ---. + if fwd[0] == '!' && len(fwd) > 1 && fwd[1] == ' ' { + valStart := 2 + valEnd := valStart + for valEnd < len(fwd) && fwd[valEnd] != '\n' && fwd[valEnd] != '\r' { + valEnd++ + } + rawVal := trimRight(fwd[valStart:valEnd]) + tkn := lex.Token("#TX", TX, rawVal, fwd[:valEnd]) + pnt.SI += valEnd + pnt.CI += valEnd + return tkn + } + + // Anchor after --- fall-through. + if fwd[0] == '&' { + continue // Will be handled at top of loop + } + + // YAML double-quoted string. + if fwd[0] == '"' { + return handleDoubleQuotedString(lex, pnt, fwd, ST) + } + + // YAML single-quoted string. + if fwd[0] == '\'' { + return handleSingleQuotedString(lex, pnt, fwd, ST) + } + + // Plain scalars starting with digits that contain colons (e.g. 20:03:20). + if fwd[0] >= '0' && fwd[0] <= '9' { + if tkn := handleNumericColon(lex, pnt, fwd, TX); tkn != nil { + return tkn + } + } + + // Element marker: - (followed by space/tab/newline/eof) + if fwd[0] == '-' && (len(fwd) < 2 || fwd[1] == ' ' || fwd[1] == '\t' || + fwd[1] == '\n' || fwd[1] == '\r') { + tkn := lex.Token("#EL", EL, nil, "- ") + pnt.SI++ + pnt.CI++ + if len(fwd) > 1 && (fwd[1] == ' ' || fwd[1] == '\t') { + pnt.SI++ + pnt.CI++ + } + return tkn + } + + // YAML colon: ": ", ":\t", ":\n", ":" at end. + isFlowColon := false + if fwd[0] == ':' && len(fwd) > 1 && fwd[1] != ' ' && fwd[1] != '\t' && + fwd[1] != '\n' && fwd[1] != '\r' { + prevI := pnt.SI - 1 + for prevI >= 0 && (lex.Src[prevI] == ' ' || lex.Src[prevI] == '\t' || + lex.Src[prevI] == '\n' || lex.Src[prevI] == '\r') { + prevI-- + } + if prevI >= 0 && (lex.Src[prevI] == '"' || lex.Src[prevI] == '\'') { + isFlowColon = true + } + } + if fwd[0] == ':' && (len(fwd) < 2 || fwd[1] == ' ' || fwd[1] == '\t' || + fwd[1] == '\n' || fwd[1] == '\r' || isFlowColon) { + tkn := lex.Token("#CL", CL, 1, ": ") + pnt.SI++ + if len(fwd) > 1 && (fwd[1] == ' ' || fwd[1] == '\t') { + pnt.CI += 2 + } else if len(fwd) > 1 && (fwd[1] == '\n' || fwd[1] == '\r') { + // Don't consume newline. + } else { + pnt.CI++ + } + return tkn + } + + // Newline handling — YAML indentation is significant. + if fwd[0] == '\n' || fwd[0] == '\r' { + // Check if we're inside a flow collection. + inFlow := 0 + for fi := 0; fi < pnt.SI; fi++ { + fc := lex.Src[fi] + if fc == '{' || fc == '[' { + inFlow++ + } else if fc == '}' || fc == ']' { + if inFlow > 0 { + inFlow-- + } + } else if fc == '"' { + fi++ + for fi < pnt.SI && lex.Src[fi] != '"' { + if lex.Src[fi] == '\\' { + fi++ + } + fi++ + } + } else if fc == '\'' { + fi++ + for fi < pnt.SI && lex.Src[fi] != '\'' { + if fi+1 < pnt.SI && lex.Src[fi] == '\'' && lex.Src[fi+1] == '\'' { + fi++ + } + fi++ + } + } + } + if inFlow > 0 { + // Inside flow collection — consume whitespace. + pos := 0 + for pos < len(fwd) && (fwd[pos] == '\n' || fwd[pos] == '\r' || + fwd[pos] == ' ' || fwd[pos] == '\t') { + pos++ + } + if pos < len(fwd) && fwd[pos] == '#' { + for pos < len(fwd) && fwd[pos] != '\n' && fwd[pos] != '\r' { + pos++ + } + } + pnt.SI += pos + pnt.CI = 0 + continue + } + + // Block context newline — emit #IN with indent level. + pos := 0 + spaces := 0 + rows := 0 + for pos < len(fwd) { + if fwd[pos] == '\r' && pos+1 < len(fwd) && fwd[pos+1] == '\n' { + pos += 2 + rows++ + } else if fwd[pos] == '\n' { + pos++ + rows++ + } else { + break + } + spaces = 0 + for pos < len(fwd) && fwd[pos] == ' ' { + pos++ + spaces++ + } + // Comment-only line — skip. + if pos < len(fwd) && fwd[pos] == '#' { + for pos < len(fwd) && fwd[pos] != '\n' && fwd[pos] != '\r' { + pos++ + } + continue + } + // Tab-only line — skip. + if pos < len(fwd) && fwd[pos] == '\t' { + tp := pos + for tp < len(fwd) && (fwd[tp] == ' ' || fwd[tp] == '\t') { + tp++ + } + if tp >= len(fwd) || fwd[tp] == '\n' || fwd[tp] == '\r' { + pos = tp + continue + } + } + // Anchor-only line. + if pos < len(fwd) && fwd[pos] == '&' { + ae := pos + 1 + for ae < len(fwd) && fwd[ae] != ' ' && fwd[ae] != '\t' && + fwd[ae] != '\n' && fwd[ae] != '\r' { + ae++ + } + afterAnchor := ae + for afterAnchor < len(fwd) && (fwd[afterAnchor] == ' ' || fwd[afterAnchor] == '\t') { + afterAnchor++ + } + if afterAnchor >= len(fwd) || fwd[afterAnchor] == '\n' || + fwd[afterAnchor] == '\r' || fwd[afterAnchor] == '#' { + pendingAnchors = append(pendingAnchors, anchorInfo{name: fwd[pos+1 : ae], inline: false}) + for afterAnchor < len(fwd) && fwd[afterAnchor] != '\n' && fwd[afterAnchor] != '\r' { + afterAnchor++ + } + pos = afterAnchor + continue + } + } + } + + // Consumed everything — emit ZZ. + if pos >= len(fwd) { + pnt.SI += pos + pnt.RI += rows + pnt.CI = spaces + 1 + tkn := lex.Token("#ZZ", ZZ, jsonic.Undefined, "") + return tkn + } + + // Emit #IN with indent level. + tkn := lex.Token("#IN", IN, spaces, fwd[:pos]) + pnt.SI += pos + pnt.RI += rows + pnt.CI = spaces + 1 + return tkn + } + + break // End of yamlMatchLoop + } + + return nil + }) + + // ===== Grammar rules ===== + configureGrammarRules(j, IN, EL, KEY, CL, ZZ, CA, CS, CB, TX, ST, VL, NR, + anchors, &pendingAnchors) +} + +// cleanSource strips YAML directives and initial document markers from source. +func cleanSource(src string, tagHandles map[string]string) string { + if len(src) == 0 { + return src + } + + // Remove leading directive block. + if src[0] == '%' { + dIdx := strings.Index(src, "\n---") + if dIdx >= 0 { + dirBlock := src[:dIdx] + for _, dl := range strings.Split(dirBlock, "\n") { + tagMatch := regexp.MustCompile(`^%TAG\s+(\S+)\s+(\S+)`).FindStringSubmatch(dl) + if tagMatch != nil { + tagHandles[tagMatch[1]] = tagMatch[2] + } + } + src = src[dIdx+1:] + } + } + + // Strip leading comment lines before ---. + for { + commentRe := regexp.MustCompile(`^[ \t]*#[^\n]*\n`) + if !commentRe.MatchString(src) || !strings.Contains(src, "\n---") { + break + } + src = commentRe.ReplaceAllString(src, "") + } + + // Handle document start marker (---). + docRe := regexp.MustCompile(`^---([ \t]+(.+))?(\r?\n|$)`) + docMatch := docRe.FindStringSubmatch(src) + if docMatch != nil { + prefix := "" + if len(docMatch) > 2 { + prefix = docMatch[2] + } + rest := src[len(docMatch[0]):] + trimmed := strings.TrimLeft(prefix, " \t") + + if len(trimmed) > 0 && (trimmed[0] == '>' || trimmed[0] == '|') { + // Leave --- in place for block scalar context. + } else if prefix != "" && (len(trimmed) == 0 || trimmed[0] != '#') { + structTagRe := regexp.MustCompile(`^!!(seq|map|omap|set|pairs|binary|ordered)\s*$`) + if structTagRe.MatchString(trimmed) { + src = rest + } else { + suffix := "" + if len(docMatch) > 3 { + suffix = docMatch[3] + } + src = prefix + suffix + rest + } + } else { + src = rest + } + } + + // Handle document end marker (... at end of source). + dotRe := regexp.MustCompile(`\n\.\.\.\s*(\r?\n.*)?$`) + if dotRe.MatchString(src) { + loc := dotRe.FindStringIndex(src) + if loc != nil { + src = src[:loc[0]] + } + } + + return src +} + +// handleBlockScalar processes | and > block scalar indicators. +func handleBlockScalar(lex *jsonic.Lex, pnt *jsonic.Point, src, fwd string, ch byte) *jsonic.LexCheckResult { + fold := ch == '>' + chomp := "clip" + explicitIndent := 0 + idx := 1 + + // Parse chomping and indent indicators. + for pi := 0; pi < 2 && idx < len(fwd); pi++ { + if fwd[idx] == '+' { + chomp = "keep" + idx++ + } else if fwd[idx] == '-' { + chomp = "strip" + idx++ + } else if fwd[idx] >= '1' && fwd[idx] <= '9' { + explicitIndent = int(fwd[idx] - '0') + idx++ + } + } + + // Skip trailing spaces and comments. + for idx < len(fwd) && fwd[idx] == ' ' { + idx++ + } + if idx < len(fwd) && fwd[idx] == '#' { + for idx < len(fwd) && fwd[idx] != '\n' && fwd[idx] != '\r' { + idx++ + } + } + + // Must be followed by newline or eof. + if idx < len(fwd) && fwd[idx] != '\n' && fwd[idx] != '\r' { + return nil // Not a block scalar. + } + + // Skip the indicator line's newline. + if idx < len(fwd) && fwd[idx] == '\r' { + idx++ + } + if idx < len(fwd) && fwd[idx] == '\n' { + idx++ + } + + // Determine block indent. + blockIndent := 0 + if explicitIndent == 0 { + // Auto-detect from first content line. + tempIdx := idx + for tempIdx < len(fwd) { + lineSpaces := 0 + for tempIdx+lineSpaces < len(fwd) && fwd[tempIdx+lineSpaces] == ' ' { + lineSpaces++ + } + afterSpaces := tempIdx + lineSpaces + if afterSpaces >= len(fwd) || fwd[afterSpaces] == '\n' || fwd[afterSpaces] == '\r' { + tempIdx = afterSpaces + if tempIdx < len(fwd) && fwd[tempIdx] == '\r' { + tempIdx++ + } + if tempIdx < len(fwd) && fwd[tempIdx] == '\n' { + tempIdx++ + } + continue + } + blockIndent = lineSpaces + break + } + } + + // Determine containing indent. + containingIndent := 0 + isDocStart := false + li := pnt.SI - 1 + for li > 0 && src[li-1] != '\n' && src[li-1] != '\r' { + li-- + } + lineStart := li + for li < pnt.SI && src[li] == ' ' { + containingIndent++ + li++ + } + if lineStart+2 < len(src) && src[lineStart] == '-' && src[lineStart+1] == '-' && src[lineStart+2] == '-' { + isDocStart = true + } + + // Apply explicit indent. + if explicitIndent > 0 { + hasColonOnLine := false + for ci := lineStart + containingIndent; ci < pnt.SI; ci++ { + if src[ci] == ':' && ci+1 < len(src) && (src[ci+1] == ' ' || src[ci+1] == '\t') { + hasColonOnLine = true + break + } + } + keyCol := containingIndent + if hasColonOnLine { + scanI := lineStart + containingIndent + for scanI < pnt.SI && src[scanI] == '-' && + scanI+1 < len(src) && (src[scanI+1] == ' ' || src[scanI+1] == '\t') { + keyCol += 2 + scanI += 2 + for scanI < pnt.SI && src[scanI] == ' ' { + keyCol++ + scanI++ + } + } + blockIndent = keyCol + explicitIndent + } else { + parentIndent := 0 + searchI := lineStart - 1 + if searchI > 0 { + if src[searchI] == '\n' { + searchI-- + } + if searchI > 0 && src[searchI] == '\r' { + searchI-- + } + prevLineEnd := searchI + 1 + for searchI > 0 && src[searchI-1] != '\n' && src[searchI-1] != '\r' { + searchI-- + } + prevLineStart := searchI + for ci := prevLineStart; ci < prevLineEnd; ci++ { + if src[ci] == ':' && (ci+1 >= prevLineEnd || src[ci+1] == ' ' || + src[ci+1] == '\t' || src[ci+1] == '\n' || src[ci+1] == '\r') { + parentIndent = 0 + pi := prevLineStart + for pi < prevLineEnd && src[pi] == ' ' { + parentIndent++ + pi++ + } + break + } + } + } + blockIndent = parentIndent + explicitIndent + containingIndent = parentIndent + } + } + + if blockIndent <= containingIndent && !isDocStart && idx < len(fwd) { + // Content is not indented enough — empty block scalar. + var val string + if chomp == "keep" { + blankCount := 0 + bi := idx + for bi < len(fwd) { + if fwd[bi] == '\n' { + blankCount++ + bi++ + } else if fwd[bi] == '\r' { + bi++ + if bi < len(fwd) && fwd[bi] == '\n' { + bi++ + } + blankCount++ + } else { + break + } + } + if blankCount > 0 { + val = strings.Repeat("\n", blankCount) + } else { + val = "\n" + } + idx = bi + } else { + val = "" + } + tkn := lex.Token("#TX", jsonic.TinTX, val, fwd[:idx]) + pnt.SI += idx + pnt.RI++ + pnt.CI = 0 + return &jsonic.LexCheckResult{Done: true, Token: tkn} + } + + // Collect indented lines. + var lines []string + pos := idx + rows := 1 + lastNewlinePos := idx + for pos < len(fwd) { + lineIndent := 0 + for pos+lineIndent < len(fwd) && fwd[pos+lineIndent] == ' ' { + lineIndent++ + } + afterSpaces := pos + lineIndent + if afterSpaces >= len(fwd) || fwd[afterSpaces] == '\n' || fwd[afterSpaces] == '\r' { + if lineIndent > blockIndent { + lines = append(lines, fwd[pos+blockIndent:afterSpaces]) + } else { + lines = append(lines, "") + } + lastNewlinePos = afterSpaces + pos = afterSpaces + if pos < len(fwd) && fwd[pos] == '\r' { + pos++ + } + if pos < len(fwd) && fwd[pos] == '\n' { + pos++ + } + rows++ + continue + } + if lineIndent < blockIndent { + break + } + if lineIndent == 0 && isDocMarker(fwd, pos) { + break + } + lineStartPos := pos + blockIndent + lineEnd := lineStartPos + for lineEnd < len(fwd) && fwd[lineEnd] != '\n' && fwd[lineEnd] != '\r' { + lineEnd++ + } + lines = append(lines, fwd[lineStartPos:lineEnd]) + lastNewlinePos = lineEnd + pos = lineEnd + if pos < len(fwd) && fwd[pos] == '\r' { + pos++ + } + if pos < len(fwd) && fwd[pos] == '\n' { + pos++ + } + rows++ + } + + // Build scalar value. + var val string + if fold { + val = foldLines(lines) + } else { + val = strings.Join(lines, "\n") + } + + // Apply chomping. + if len(lines) == 0 { + val = "" + } else if chomp == "strip" { + val = strings.TrimRight(val, "\n") + } else if chomp == "clip" { + val = strings.TrimRight(val, "\n") + "\n" + } else { + // keep + val = val + "\n" + } + + // Don't consume final newline if more content follows. + endPos := pos + endRows := rows + if pos < len(fwd) && pos > lastNewlinePos { + ni := pos + nextLineIndent := 0 + for ni < len(fwd) && fwd[ni] == ' ' { + nextLineIndent++ + ni++ + } + isNextDocMarker := nextLineIndent == 0 && isDocMarker(fwd, ni) + if !isNextDocMarker { + endPos = lastNewlinePos + endRows = rows - 1 + } + } + + tkn := lex.Token("#TX", jsonic.TinTX, val, fwd[:endPos]) + pnt.SI += endPos + pnt.RI += endRows + pnt.CI = 0 + return &jsonic.LexCheckResult{Done: true, Token: tkn} +} + +// foldLines implements YAML folded scalar line joining. +func foldLines(lines []string) string { + var result strings.Builder + prevWasNormal := false + pendingEmptyCount := 0 + + for _, line := range lines { + isMore := len(line) > 0 && (line[0] == ' ' || line[0] == '\t') + isEmpty := line == "" + + if isEmpty { + pendingEmptyCount++ + } else if isMore { + if prevWasNormal && result.Len() > 0 { + result.WriteByte('\n') + } + for ei := 0; ei < pendingEmptyCount; ei++ { + result.WriteByte('\n') + } + pendingEmptyCount = 0 + if result.Len() > 0 { + s := result.String() + if s[len(s)-1] != '\n' { + result.WriteByte('\n') + } + } + result.WriteString(line) + result.WriteByte('\n') + prevWasNormal = false + } else { + if pendingEmptyCount > 0 { + if prevWasNormal && result.Len() > 0 { + result.WriteByte('\n') + for ei := 1; ei < pendingEmptyCount; ei++ { + result.WriteByte('\n') + } + } else { + for ei := 0; ei < pendingEmptyCount; ei++ { + result.WriteByte('\n') + } + } + pendingEmptyCount = 0 + } + if prevWasNormal && result.Len() > 0 { + s := result.String() + if s[len(s)-1] != '\n' { + result.WriteByte(' ') + } + } + result.WriteString(line) + prevWasNormal = true + } + } + for ei := 0; ei < pendingEmptyCount; ei++ { + result.WriteByte('\n') + } + return result.String() +} + +// handleTagInTextCheck processes !!type tags encountered in the text check callback. +func handleTagInTextCheck(lex *jsonic.Lex, pnt *jsonic.Point, fwd string, tagHandles map[string]string) *jsonic.LexCheckResult { + tagEnd := 2 + for tagEnd < len(fwd) && fwd[tagEnd] != ' ' && fwd[tagEnd] != '\n' && fwd[tagEnd] != '\r' { + tagEnd++ + } + tag := fwd[2:tagEnd] + if tag == "seq" || tag == "map" { + return nil // Let yamlMatcher handle. + } + valStart := tagEnd + if valStart < len(fwd) && fwd[valStart] == ' ' { + valStart++ + } + rawVal := "" + valEnd := valStart + if valStart < len(fwd) && (fwd[valStart] == '"' || fwd[valStart] == '\'') { + q := fwd[valStart] + valEnd = valStart + 1 + for valEnd < len(fwd) && fwd[valEnd] != q { + if fwd[valEnd] == '\\' && q == '"' { + valEnd++ + } + valEnd++ + } + if valEnd < len(fwd) && fwd[valEnd] == q { + valEnd++ + } + rawVal = fwd[valStart+1 : valEnd-1] + } else { + for valEnd < len(fwd) && fwd[valEnd] != '\n' && fwd[valEnd] != '\r' { + if fwd[valEnd] == ':' && (valEnd+1 >= len(fwd) || fwd[valEnd+1] == ' ' || + fwd[valEnd+1] == '\n' || fwd[valEnd+1] == '\r') { + break + } + if fwd[valEnd] == ' ' && valEnd+1 < len(fwd) && fwd[valEnd+1] == '#' { + break + } + valEnd++ + } + rawVal = trimRight(fwd[valStart:valEnd]) + } + + result := applyTagConversion(tag, rawVal, tagHandles) + tknTin := jsonic.TinTX + switch result.(type) { + case float64: + tknTin = jsonic.TinNR + case bool, nil: + tknTin = jsonic.TinVL + } + if result == nil { + tknTin = jsonic.TinVL + } + + tkn := lex.Token(tinToName(tknTin), tknTin, result, fwd[:valEnd]) + pnt.SI += valEnd + pnt.CI += valEnd + return &jsonic.LexCheckResult{Done: true, Token: tkn} +} + +// handlePlainScalar processes YAML plain scalar values with multiline continuation. +func handlePlainScalar(lex *jsonic.Lex, pnt *jsonic.Point, src, fwd string) *jsonic.LexCheckResult { + // Detect flow context. + inFlowCtx := false + depth := 0 + for fi := 0; fi < pnt.SI; fi++ { + fc := src[fi] + if fc == '{' || fc == '[' { + depth++ + } else if fc == '}' || fc == ']' { + if depth > 0 { + depth-- + } + } else if fc == '"' { + fi++ + for fi < pnt.SI && src[fi] != '"' { + if src[fi] == '\\' { + fi++ + } + fi++ + } + } else if fc == '\'' { + fi++ + for fi < pnt.SI && src[fi] != '\'' { + if fi+1 < pnt.SI && src[fi] == '\'' && src[fi+1] == '\'' { + fi++ + } + fi++ + } + } + } + inFlowCtx = depth > 0 + + // Find current line indent. + lineStartPos := pnt.SI + for lineStartPos > 0 && src[lineStartPos-1] != '\n' && src[lineStartPos-1] != '\r' { + lineStartPos-- + } + currentLineIndent := 0 + ci := lineStartPos + for ci < pnt.SI && src[ci] == ' ' { + currentLineIndent++ + ci++ + } + + // Check if text is preceded by ": " on the same line. + isMapValue := false + ci = pnt.SI - 1 + for ci >= lineStartPos && (src[ci] == ' ' || src[ci] == '\t') { + ci-- + } + if ci >= lineStartPos && src[ci] == ':' { + isMapValue = true + } + + minContinuationIndent := currentLineIndent + if isMapValue { + minContinuationIndent = currentLineIndent + 1 + } + + // Scan first line. + text := "" + i := 0 + totalConsumed := 0 + rows := 0 + + scanLine := func() string { + line := "" + for i < len(fwd) { + c := fwd[i] + if c == '\n' || c == '\r' { + break + } + if c == ':' && (i+1 >= len(fwd) || fwd[i+1] == ' ' || fwd[i+1] == '\t' || + fwd[i+1] == '\n' || fwd[i+1] == '\r') { + break + } + if (c == ' ' || c == '\t') && i+1 < len(fwd) && fwd[i+1] == '#' { + break + } + if inFlowCtx && (c == ']' || c == '}') { + break + } + if c == ',' && inFlowCtx { + break + } + line += string(c) + i++ + } + return trimRight(line) + } + + text = scanLine() + totalConsumed = i + + // Check for continuation lines (multiline plain scalars). + for i < len(fwd) && (fwd[i] == '\n' || fwd[i] == '\r') { + nlPos := i + blankLines := 0 + for i < len(fwd) && (fwd[i] == '\n' || fwd[i] == '\r') { + if fwd[i] == '\r' { + i++ + } + if i < len(fwd) && fwd[i] == '\n' { + i++ + } + li := 0 + for i+li < len(fwd) && (fwd[i+li] == ' ' || fwd[i+li] == '\t') { + li++ + } + if i+li >= len(fwd) || fwd[i+li] == '\n' || fwd[i+li] == '\r' { + blankLines++ + i += li + continue + } + break + } + lineIndent := 0 + for i < len(fwd) && (fwd[i] == ' ' || fwd[i] == '\t') { + lineIndent++ + i++ + } + + isNextDocMarker := lineIndent == 0 && i < len(fwd) && isDocMarker(fwd, i) + isSeqMarker := false + if i < len(fwd) && fwd[i] == '-' && (i+1 >= len(fwd) || fwd[i+1] == ' ' || + fwd[i+1] == '\t' || fwd[i+1] == '\n' || fwd[i+1] == '\r') { + seqIndent := -1 + si := pnt.SI - 1 + for si >= lineStartPos { + if src[si] == '-' && (si+1 < len(src) && (src[si+1] == ' ' || src[si+1] == '\t')) { + seqIndent = si - lineStartPos + break + } + si-- + } + isSeqMarker = (seqIndent >= 0 && lineIndent == seqIndent) || + (seqIndent < 0 && lineIndent <= currentLineIndent) + } + + canContinue := false + if inFlowCtx { + canContinue = i < len(fwd) && fwd[i] != '\n' && fwd[i] != '\r' && + fwd[i] != '#' && fwd[i] != '{' && fwd[i] != '}' && + fwd[i] != '[' && fwd[i] != ']' + } else { + canContinue = lineIndent >= minContinuationIndent && i < len(fwd) && + fwd[i] != '\n' && fwd[i] != '\r' && fwd[i] != '#' && + !isNextDocMarker && !isSeqMarker + } + + if canContinue { + // Check if continuation line is a key-value pair. + isKV := false + peekJ := i + for peekJ < len(fwd) && fwd[peekJ] != '\n' && fwd[peekJ] != '\r' { + if fwd[peekJ] == ':' && (peekJ+1 >= len(fwd) || fwd[peekJ+1] == ' ' || + fwd[peekJ+1] == '\t' || fwd[peekJ+1] == '\n' || fwd[peekJ+1] == '\r') { + isKV = true + break + } + if fwd[peekJ] == '}' || fwd[peekJ] == ']' || fwd[peekJ] == ',' { + break + } + peekJ++ + } + if !isKV || inFlowCtx { + contLine := scanLine() + if len(contLine) > 0 { + if blankLines > 0 { + for b := 0; b < blankLines; b++ { + text += "\n" + } + } else { + text += " " + } + text += contLine + totalConsumed = i + rows++ + continue + } + } + } + i = nlPos + break + } + + text = trimRight(text) + if len(text) == 0 { + return nil + } + + // Check if this is a YAML value keyword. + if val, ok := isYamlValue(text); ok { + tkn := lex.Token("#VL", jsonic.TinVL, val, text) + pnt.SI += len(text) + pnt.CI += len(text) + return &jsonic.LexCheckResult{Done: true, Token: tkn} + } + + // Check if it's a number. + if num, ok := parseYamlNumber(text); ok { + tkn := lex.Token("#NR", jsonic.TinNR, num, text) + pnt.SI += len(text) + pnt.CI += len(text) + return &jsonic.LexCheckResult{Done: true, Token: tkn} + } + + // Plain text. + tkn := lex.Token("#TX", jsonic.TinTX, text, fwd[:totalConsumed]) + pnt.SI += totalConsumed + pnt.RI += rows + pnt.CI += totalConsumed + return &jsonic.LexCheckResult{Done: true, Token: tkn} +} + +// handleTypeTag processes !!type tags (!!str, !!int, !!float, etc.). +func handleTypeTag(lex *jsonic.Lex, pnt *jsonic.Point, fwd string, + tagHandles map[string]string, pendingAnchors *[]anchorInfo, + anchors map[string]any, TX, NR, VL, ST jsonic.Tin) *jsonic.Token { + + tagEnd := 2 + for tagEnd < len(fwd) && fwd[tagEnd] != ' ' && fwd[tagEnd] != '\n' && + fwd[tagEnd] != '\r' && fwd[tagEnd] != ',' && + fwd[tagEnd] != '}' && fwd[tagEnd] != ']' && fwd[tagEnd] != ':' { + tagEnd++ + } + tag := fwd[2:tagEnd] + valStart := tagEnd + if valStart < len(fwd) && fwd[valStart] == ' ' { + valStart++ + } + valEnd := valStart + + // Skip anchor before value. + tagAnchorName := "" + if valStart < len(fwd) && fwd[valStart] == '&' { + anchorEnd := valStart + 1 + for anchorEnd < len(fwd) && fwd[anchorEnd] != ' ' && fwd[anchorEnd] != '\n' && fwd[anchorEnd] != '\r' { + anchorEnd++ + } + tagAnchorName = fwd[valStart+1 : anchorEnd] + *pendingAnchors = append(*pendingAnchors, anchorInfo{name: tagAnchorName, inline: true}) + if anchorEnd < len(fwd) && fwd[anchorEnd] == ' ' { + anchorEnd++ + } + valStart = anchorEnd + valEnd = valStart + } + + // Check for quoted value. + if valStart < len(fwd) && (fwd[valStart] == '"' || fwd[valStart] == '\'') { + q := fwd[valStart] + valEnd = valStart + 1 + for valEnd < len(fwd) && fwd[valEnd] != q { + if fwd[valEnd] == '\\' && q == '"' { + valEnd++ + } + valEnd++ + } + if valEnd < len(fwd) && fwd[valEnd] == q { + valEnd++ + } + rawVal := fwd[valStart+1 : valEnd-1] + result := applyTagConversion(tag, rawVal, tagHandles) + if tagAnchorName != "" { + anchors[tagAnchorName] = result + } + tknTin := TX + switch result.(type) { + case float64: + tknTin = NR + case bool: + tknTin = VL + } + if result == nil { + tknTin = VL + } + tkn := lex.Token(tinToName(tknTin), tknTin, result, fwd[:valEnd]) + pnt.SI += valEnd + pnt.CI += valEnd + return tkn + } + + // Tag followed by newline — skip and let next cycle handle. + if valStart < len(fwd) && (fwd[valStart] == '\n' || fwd[valStart] == '\r') && valStart < len(fwd)-1 { + nl := valStart + if nl < len(fwd) && fwd[nl] == '\r' { + nl++ + } + if nl < len(fwd) && fwd[nl] == '\n' { + nl++ + } + pnt.SI += nl + pnt.CI = 0 + pnt.RI++ + return nil // Will re-enter matcher + } + + // Unquoted value. + for valEnd < len(fwd) && fwd[valEnd] != '\n' && fwd[valEnd] != '\r' && + fwd[valEnd] != ',' && fwd[valEnd] != '}' && fwd[valEnd] != ']' { + if fwd[valEnd] == ':' && (valEnd+1 >= len(fwd) || fwd[valEnd+1] == ' ' || + fwd[valEnd+1] == '\n' || fwd[valEnd+1] == '\r') { + break + } + if fwd[valEnd] == ' ' && valEnd+1 < len(fwd) && fwd[valEnd+1] == '#' { + break + } + valEnd++ + } + rawVal := trimRight(fwd[valStart:valEnd]) + result := applyTagConversion(tag, rawVal, tagHandles) + if tagAnchorName != "" { + anchors[tagAnchorName] = result + } + tknTin := TX + switch result.(type) { + case string: + if result.(string) == "" { + tknTin = ST + } else { + tknTin = TX + } + case float64: + tknTin = NR + case bool: + tknTin = VL + } + if result == nil { + tknTin = VL + } + tkn := lex.Token(tinToName(tknTin), tknTin, result, fwd[:valEnd]) + pnt.SI += valEnd + pnt.CI += valEnd + return tkn +} + +// handleExplicitKey processes ? key\n: value patterns. +func handleExplicitKey(lex *jsonic.Lex, pnt *jsonic.Point, fwd string, + pendingExplicitCL *bool, pendingTokens *[]*jsonic.Token, + TX, CL, VL jsonic.Tin) *jsonic.Token { + + start := 1 + if len(fwd) > 1 && (fwd[1] == ' ' || fwd[1] == '\t') { + start = 2 + } + + // Collect key text. + keyEnd := start + for keyEnd < len(fwd) && fwd[keyEnd] != '\n' && fwd[keyEnd] != '\r' { + if fwd[keyEnd] == ' ' && keyEnd+1 < len(fwd) && fwd[keyEnd+1] == '#' { + break + } + keyEnd++ + } + key := trimRight(fwd[start:keyEnd]) + consumed := keyEnd + + // Skip comment at end of key line. + for consumed < len(fwd) && fwd[consumed] != '\n' && fwd[consumed] != '\r' { + consumed++ + } + beforeNewline := consumed + + // Consume newline. + if consumed < len(fwd) && fwd[consumed] == '\r' { + consumed++ + } + if consumed < len(fwd) && fwd[consumed] == '\n' { + consumed++ + } + + // Check for continuation lines. + qIndent := 0 + li := pnt.SI + for li > 0 && lex.Src[li-1] != '\n' && lex.Src[li-1] != '\r' { + li-- + } + for li < pnt.SI && lex.Src[li] == ' ' { + qIndent++ + li++ + } + + // Scan continuation lines (plain scalar multiline key). + for consumed < len(fwd) { + lineIndent := 0 + for consumed+lineIndent < len(fwd) && fwd[consumed+lineIndent] == ' ' { + lineIndent++ + } + afterSpaces := consumed + lineIndent + if afterSpaces < len(fwd) && fwd[afterSpaces] == '#' { + for afterSpaces < len(fwd) && fwd[afterSpaces] != '\n' && fwd[afterSpaces] != '\r' { + afterSpaces++ + } + beforeNewline = afterSpaces + if afterSpaces < len(fwd) && fwd[afterSpaces] == '\r' { + afterSpaces++ + } + if afterSpaces < len(fwd) && fwd[afterSpaces] == '\n' { + afterSpaces++ + } + consumed = afterSpaces + continue + } + if lineIndent > qIndent && afterSpaces < len(fwd) && + fwd[afterSpaces] != ':' && fwd[afterSpaces] != '?' && fwd[afterSpaces] != '-' { + contEnd := afterSpaces + for contEnd < len(fwd) && fwd[contEnd] != '\n' && fwd[contEnd] != '\r' { + if fwd[contEnd] == ' ' && contEnd+1 < len(fwd) && fwd[contEnd+1] == '#' { + break + } + contEnd++ + } + contText := trimRight(fwd[afterSpaces:contEnd]) + if len(contText) > 0 { + key += " " + contText + } + consumed = contEnd + beforeNewline = consumed + if consumed < len(fwd) && fwd[consumed] == '\r' { + consumed++ + } + if consumed < len(fwd) && fwd[consumed] == '\n' { + consumed++ + } + continue + } + break + } + + // Check if next line starts with ":". + hasValue := false + valConsumed := consumed + ci := consumed + for ci < len(fwd) && fwd[ci] == ' ' { + ci++ + } + if ci < len(fwd) && fwd[ci] == ':' && + (ci+1 >= len(fwd) || fwd[ci+1] == ' ' || fwd[ci+1] == '\t' || + fwd[ci+1] == '\n' || fwd[ci+1] == '\r') { + hasValue = true + valConsumed = ci + 1 + if valConsumed < len(fwd) && (fwd[valConsumed] == ' ' || fwd[valConsumed] == '\t') { + valConsumed++ + } + } + + if hasValue { + pnt.SI += valConsumed + pnt.RI++ + pnt.CI = valConsumed - consumed + 1 + *pendingExplicitCL = true + } else { + pnt.SI += beforeNewline + pnt.CI += beforeNewline + clTkn := lex.Token("#CL", CL, 1, ": ") + vlTkn := lex.Token("#VL", VL, nil, "") + *pendingTokens = append(*pendingTokens, clTkn, vlTkn) + } + + tkn := lex.Token("#TX", TX, key, fwd[:keyEnd]) + return tkn +} + +// handleDocMarker processes --- and ... document markers. +func handleDocMarker(lex *jsonic.Lex, pnt *jsonic.Point, fwd string, + IN jsonic.Tin, pendingAnchors *[]anchorInfo, anchors map[string]any, + TX jsonic.Tin) *jsonic.Token { + + pos := 3 + for pos < len(fwd) && fwd[pos] != '\n' && fwd[pos] != '\r' { + pos++ + } + + if fwd[0] == '.' { + // ... terminates document. + pnt.SI += pos + pnt.CI += pos + for pnt.SI < pnt.Len && (lex.Src[pnt.SI] == '\n' || lex.Src[pnt.SI] == '\r') { + if lex.Src[pnt.SI] == '\r' { + pnt.SI++ + } + if pnt.SI < pnt.Len && lex.Src[pnt.SI] == '\n' { + pnt.SI++ + } + pnt.RI++ + } + return lex.Token("#ZZ", jsonic.TinZZ, jsonic.Undefined, "") + } + + // --- handler. + afterDash := 3 + for afterDash < len(fwd) && fwd[afterDash] == ' ' { + afterDash++ + } + dashNextCh := byte(0) + if afterDash < len(fwd) { + dashNextCh = fwd[afterDash] + } + hasInlineValue := dashNextCh != 0 && dashNextCh != '\n' && dashNextCh != '\r' && + dashNextCh != '&' && dashNextCh != '!' && dashNextCh != '#' + + if hasInlineValue { + pnt.SI += afterDash + pnt.CI = afterDash + return nil // Fall through to continue matching. + } + + // Plain --- with nothing after it. + pnt.SI += pos + pnt.RI++ + if pnt.SI < pnt.Len && lex.Src[pnt.SI] == '\r' { + pnt.SI++ + } + if pnt.SI < pnt.Len && lex.Src[pnt.SI] == '\n' { + pnt.SI++ + } + spaces := 0 + for pnt.SI+spaces < pnt.Len && lex.Src[pnt.SI+spaces] == ' ' { + spaces++ + } + pnt.SI += spaces + pnt.CI = spaces + + if pnt.SI >= pnt.Len { + return lex.Token("#ZZ", jsonic.TinZZ, jsonic.Undefined, "") + } + + nextCh := lex.Src[pnt.SI] + if nextCh == '{' || nextCh == '[' || nextCh == '"' || nextCh == '\'' { + return nil // Fall through. + } + if spaces == 0 && nextCh != '-' && nextCh != '.' && nextCh != '?' && + nextCh != '\n' && nextCh != '\r' { + return nil // Fall through. + } + + // Emit #IN with indent level. + return lex.Token("#IN", IN, spaces, fwd[:pos+1+spaces]) +} + +// handleDoubleQuotedString processes YAML double-quoted strings. +func handleDoubleQuotedString(lex *jsonic.Lex, pnt *jsonic.Point, fwd string, ST jsonic.Tin) *jsonic.Token { + i := 1 + val := "" + escapedUpTo := 0 + + for i < len(fwd) && fwd[i] != '"' { + if fwd[i] == '\\' { + i++ + if i >= len(fwd) { + break + } + esc := fwd[i] + switch esc { + case 'n': + val += "\n" + i++ + escapedUpTo = len(val) + case 't': + val += "\t" + i++ + escapedUpTo = len(val) + case 'r': + val += "\r" + i++ + escapedUpTo = len(val) + case '"': + val += "\"" + i++ + escapedUpTo = len(val) + case '\\': + val += "\\" + i++ + escapedUpTo = len(val) + case '/': + val += "/" + i++ + escapedUpTo = len(val) + case 'b': + val += "\b" + i++ + escapedUpTo = len(val) + case 'f': + val += "\f" + i++ + escapedUpTo = len(val) + case 'a': + val += "\x07" + i++ + escapedUpTo = len(val) + case 'e': + val += "\x1b" + i++ + escapedUpTo = len(val) + case 'v': + val += "\v" + i++ + escapedUpTo = len(val) + case '0': + val += "\x00" + i++ + escapedUpTo = len(val) + case ' ': + val += " " + i++ + escapedUpTo = len(val) + case '_': + val += "\u00a0" + i++ + escapedUpTo = len(val) + case 'N': + val += "\u0085" + i++ + escapedUpTo = len(val) + case 'L': + val += "\u2028" + i++ + escapedUpTo = len(val) + case 'P': + val += "\u2029" + i++ + escapedUpTo = len(val) + case 'x': + if i+3 <= len(fwd) { + n, err := strconv.ParseInt(fwd[i+1:i+3], 16, 32) + if err == nil { + val += string(rune(n)) + i += 3 + escapedUpTo = len(val) + } else { + val += string(esc) + i++ + } + } else { + val += string(esc) + i++ + } + case 'u': + if i+5 <= len(fwd) { + n, err := strconv.ParseInt(fwd[i+1:i+5], 16, 32) + if err == nil { + val += string(rune(n)) + i += 5 + escapedUpTo = len(val) + } else { + val += string(esc) + i++ + } + } else { + val += string(esc) + i++ + } + case 'U': + if i+9 <= len(fwd) { + n, err := strconv.ParseInt(fwd[i+1:i+9], 16, 32) + if err == nil { + val += string(rune(n)) + i += 9 + escapedUpTo = len(val) + } else { + val += string(esc) + i++ + } + } else { + val += string(esc) + i++ + } + case '\n', '\r': + // Escaped newline: line continuation. + if esc == '\r' && i+1 < len(fwd) && fwd[i+1] == '\n' { + i++ + } + i++ + for i < len(fwd) && (fwd[i] == ' ' || fwd[i] == '\t') { + i++ + } + default: + val += string(esc) + i++ + } + } else if fwd[i] == '\n' || fwd[i] == '\r' { + // Flow scalar line folding. + trimTo := len(val) + for trimTo > escapedUpTo && (val[trimTo-1] == ' ' || val[trimTo-1] == '\t') { + trimTo-- + } + val = val[:trimTo] + emptyLines := 0 + for i < len(fwd) && (fwd[i] == '\n' || fwd[i] == '\r') { + if fwd[i] == '\r' { + i++ + } + if i < len(fwd) && fwd[i] == '\n' { + i++ + } + emptyLines++ + for i < len(fwd) && (fwd[i] == ' ' || fwd[i] == '\t') { + i++ + } + } + if emptyLines > 1 { + for e := 1; e < emptyLines; e++ { + val += "\n" + } + } else { + val += " " + } + } else { + val += string(fwd[i]) + i++ + } + } + if i < len(fwd) && fwd[i] == '"' { + i++ + } + tkn := lex.Token("#ST", ST, val, fwd[:i]) + pnt.SI += i + pnt.CI += i + return tkn +} + +// handleSingleQuotedString processes YAML single-quoted strings. +func handleSingleQuotedString(lex *jsonic.Lex, pnt *jsonic.Point, fwd string, ST jsonic.Tin) *jsonic.Token { + i := 1 + val := "" + for i < len(fwd) { + if fwd[i] == '\'' { + if i+1 < len(fwd) && fwd[i+1] == '\'' { + val += "'" + i += 2 + } else { + i++ + break + } + } else if fwd[i] == '\n' || fwd[i] == '\r' { + // Flow scalar line folding. + val = strings.TrimRight(val, " \t") + emptyLines := 0 + for i < len(fwd) && (fwd[i] == '\n' || fwd[i] == '\r') { + if fwd[i] == '\r' { + i++ + } + if i < len(fwd) && fwd[i] == '\n' { + i++ + } + emptyLines++ + for i < len(fwd) && (fwd[i] == ' ' || fwd[i] == '\t') { + i++ + } + } + if emptyLines > 1 { + for e := 1; e < emptyLines; e++ { + val += "\n" + } + } else { + val += " " + } + } else { + val += string(fwd[i]) + i++ + } + } + tkn := lex.Token("#ST", ST, val, fwd[:i]) + pnt.SI += i + pnt.CI += i + return tkn +} + +// handleNumericColon handles plain scalars starting with digits that contain colons. +func handleNumericColon(lex *jsonic.Lex, pnt *jsonic.Point, fwd string, TX jsonic.Tin) *jsonic.Token { + hasEmbeddedColon := false + pi := 1 + for pi < len(fwd) && fwd[pi] != '\n' && fwd[pi] != '\r' { + if fwd[pi] == ':' && pi+1 < len(fwd) && fwd[pi+1] != ' ' && fwd[pi+1] != '\t' && + fwd[pi+1] != '\n' && fwd[pi+1] != '\r' { + hasEmbeddedColon = true + break + } + if fwd[pi] == ' ' || fwd[pi] == '\t' { + break + } + pi++ + } + if !hasEmbeddedColon { + return nil + } + end := 0 + for end < len(fwd) && fwd[end] != ' ' && fwd[end] != '\t' && + fwd[end] != '\n' && fwd[end] != '\r' { + end++ + } + text := fwd[:end] + tkn := lex.Token("#TX", TX, text, text) + pnt.SI += end + pnt.CI += end + return tkn +} + +// applyTagConversion applies !!type tag conversion to a raw value. +func applyTagConversion(tag, rawVal string, tagHandles map[string]string) any { + if _, ok := tagHandles["!!"]; ok { + return rawVal // Custom tag handle — don't apply built-in conversion. + } + switch tag { + case "str": + return rawVal + case "int": + n, err := strconv.ParseInt(rawVal, 10, 64) + if err == nil { + return float64(n) + } + return rawVal + case "float": + n, err := strconv.ParseFloat(rawVal, 64) + if err == nil { + return n + } + return rawVal + case "bool": + return rawVal == "true" || rawVal == "True" || rawVal == "TRUE" + case "null": + return nil + default: + return rawVal + } +} + +// tinToName converts a Tin to its name string. +func tinToName(tin jsonic.Tin) string { + switch tin { + case jsonic.TinTX: + return "#TX" + case jsonic.TinNR: + return "#NR" + case jsonic.TinST: + return "#ST" + case jsonic.TinVL: + return "#VL" + case jsonic.TinOB: + return "#OB" + case jsonic.TinCB: + return "#CB" + case jsonic.TinOS: + return "#OS" + case jsonic.TinCS: + return "#CS" + case jsonic.TinCL: + return "#CL" + case jsonic.TinCA: + return "#CA" + case jsonic.TinZZ: + return "#ZZ" + default: + return "#UK" + } +} diff --git a/go/yaml.go b/go/yaml.go new file mode 100644 index 0000000..74b8b95 --- /dev/null +++ b/go/yaml.go @@ -0,0 +1,204 @@ +package yaml + +import ( + "encoding/json" + "fmt" + "math" + "strconv" + "strings" + + jsonic "github.com/jsonicjs/jsonic/go" +) + +// YamlOptions configures the YAML parser plugin. +// Currently empty — reserved for future extension. +type YamlOptions struct{} + +// Parse parses a YAML string and returns the resulting Go value. +// The returned value can be: +// - map[string]any for mappings +// - []any for sequences +// - float64 for numbers +// - string for strings +// - bool for booleans +// - nil for null or empty input +func Parse(src string) (any, error) { + j := MakeJsonic() + return j.Parse(src) +} + +// MakeJsonic creates a jsonic instance configured for YAML parsing. +func MakeJsonic(opts ...YamlOptions) *jsonic.Jsonic { + j := jsonic.Make(jsonic.Options{ + String: &jsonic.StringOptions{ + Chars: "`", // Remove single quote from string chars; we handle YAML strings in yamlMatcher + }, + Lex: &jsonic.LexOptions{ + EmptyResult: nil, + }, + }) + + j.Use(Yaml, nil) + return j +} + +// yamlValueMap maps YAML value keywords to their Go values. +var yamlValueMap = map[string]any{ + "true": true, "True": true, "TRUE": true, + "false": false, "False": false, "FALSE": false, + "null": nil, "Null": nil, "NULL": nil, + "~": nil, + "yes": true, "Yes": true, "YES": true, + "no": false, "No": false, "NO": false, + "on": true, "On": true, "ON": true, + "off": false, "Off": false, "OFF": false, + ".inf": math.Inf(1), ".Inf": math.Inf(1), ".INF": math.Inf(1), + "-.inf": math.Inf(-1), "-.Inf": math.Inf(-1), "-.INF": math.Inf(-1), + ".nan": math.NaN(), ".NaN": math.NaN(), ".NAN": math.NaN(), +} + +// isYamlValue checks if text is a YAML value keyword and returns the value. +func isYamlValue(text string) (any, bool) { + val, ok := yamlValueMap[text] + return val, ok +} + +// parseYamlNumber attempts to parse text as a YAML number. +// Returns the number and true if successful, or 0 and false if not a number. +func parseYamlNumber(text string) (float64, bool) { + if text == "" { + return 0, false + } + // Try standard float parsing + num, err := strconv.ParseFloat(text, 64) + if err == nil { + return num, true + } + // Try integer formats: hex, octal, binary + if strings.HasPrefix(text, "0x") || strings.HasPrefix(text, "0X") { + if n, err := strconv.ParseInt(text[2:], 16, 64); err == nil { + return float64(n), true + } + } + if strings.HasPrefix(text, "0o") || strings.HasPrefix(text, "0O") { + if n, err := strconv.ParseInt(text[2:], 8, 64); err == nil { + return float64(n), true + } + } + if strings.HasPrefix(text, "0b") || strings.HasPrefix(text, "0B") { + if n, err := strconv.ParseInt(text[2:], 2, 64); err == nil { + return float64(n), true + } + } + // Negative hex/oct/bin + if len(text) > 1 && text[0] == '-' { + if num, ok := parseYamlNumber(text[1:]); ok { + return -num, true + } + } + if len(text) > 1 && text[0] == '+' { + return parseYamlNumber(text[1:]) + } + return 0, false +} + +// deepCopy performs a JSON-based deep copy of a value. +func deepCopy(v any) any { + if v == nil { + return nil + } + switch val := v.(type) { + case map[string]any: + data, err := json.Marshal(val) + if err != nil { + return v + } + var result map[string]any + if err := json.Unmarshal(data, &result); err != nil { + return v + } + return result + case []any: + data, err := json.Marshal(val) + if err != nil { + return v + } + var result []any + if err := json.Unmarshal(data, &result); err != nil { + return v + } + return result + default: + return v + } +} + +// extractKey extracts a key value from a token, resolving aliases. +func extractKey(o0 *jsonic.Token, anchors map[string]any) any { + if o0.Tin == jsonic.TinVL { + if m, ok := o0.Val.(map[string]any); ok { + if alias, ok := m["__yamlAlias"].(string); ok { + if val, exists := anchors[alias]; exists { + return val + } + return "*" + alias + } + } + } + if o0.Tin == jsonic.TinST || o0.Tin == jsonic.TinTX { + if s, ok := o0.Val.(string); ok { + return s + } + } + return o0.Src +} + +// anchorInfo holds anchor metadata during parsing. +type anchorInfo struct { + name string + inline bool +} + +// isDocMarker checks if the string at position i starts with --- or ... +// followed by a space, tab, newline, or end of string. +func isDocMarker(s string, i int) bool { + if i+3 > len(s) { + return false + } + marker := s[i : i+3] + if marker != "---" && marker != "..." { + return false + } + if i+3 >= len(s) { + return true + } + next := s[i+3] + return next == '\n' || next == '\r' || next == ' ' || next == '\t' +} + +// trimRight removes trailing whitespace from a string. +func trimRight(s string) string { + return strings.TrimRight(s, " \t") +} + +// formatKey converts a value to a string suitable for use as a map key. +func formatKey(v any) string { + switch k := v.(type) { + case string: + return k + case float64: + if k == float64(int64(k)) { + return fmt.Sprintf("%d", int64(k)) + } + return fmt.Sprintf("%g", k) + case bool: + if k { + return "true" + } + return "false" + case nil: + return "null" + default: + return fmt.Sprintf("%v", v) + } +} diff --git a/go/yaml_test.go b/go/yaml_test.go new file mode 100644 index 0000000..ca17b03 --- /dev/null +++ b/go/yaml_test.go @@ -0,0 +1,630 @@ +package yaml + +import ( + "encoding/json" + "math" + "reflect" + "testing" +) + +// y is a helper that parses YAML and returns the result. +func y(t *testing.T, src string) any { + t.Helper() + result, err := Parse(src) + if err != nil { + t.Fatalf("Parse error: %v\nInput: %q", err, src) + } + return result +} + +// jsonNormalize round-trips through JSON to normalize types (e.g., int→float64). +func jsonNormalize(v any) any { + data, err := json.Marshal(v) + if err != nil { + return v + } + var out any + if err := json.Unmarshal(data, &out); err != nil { + return v + } + return out +} + +func expectEqual(t *testing.T, got, want any) { + t.Helper() + gotN := jsonNormalize(got) + wantN := jsonNormalize(want) + if !reflect.DeepEqual(gotN, wantN) { + gotJSON, _ := json.MarshalIndent(gotN, "", " ") + wantJSON, _ := json.MarshalIndent(wantN, "", " ") + t.Errorf("Mismatch:\nGot: %s\nWant: %s", gotJSON, wantJSON) + } +} + +// ===== BLOCK MAPPINGS ===== + +func TestSinglePair(t *testing.T) { + expectEqual(t, y(t, "a: 1"), map[string]any{"a": float64(1)}) +} + +func TestMultiplePairs(t *testing.T) { + expectEqual(t, y(t, "a: 1\nb: 2\nc: 3"), map[string]any{"a": float64(1), "b": float64(2), "c": float64(3)}) +} + +func TestNestedMap(t *testing.T) { + expectEqual(t, y(t, "a:\n b: 1\n c: 2"), map[string]any{"a": map[string]any{"b": float64(1), "c": float64(2)}}) +} + +func TestDeeplyNestedMap(t *testing.T) { + expectEqual(t, y(t, "a:\n b:\n c:\n d: 1"), + map[string]any{"a": map[string]any{"b": map[string]any{"c": map[string]any{"d": float64(1)}}}}) +} + +func TestSiblingNestedMaps(t *testing.T) { + expectEqual(t, y(t, "a:\n x: 1\nb:\n y: 2"), + map[string]any{"a": map[string]any{"x": float64(1)}, "b": map[string]any{"y": float64(2)}}) +} + +func TestEmptyValueFollowedBySibling(t *testing.T) { + expectEqual(t, y(t, "a:\nb: 1"), map[string]any{"a": nil, "b": float64(1)}) +} + +func TestColonAtEndOfLine(t *testing.T) { + expectEqual(t, y(t, "a:\n b: 1"), map[string]any{"a": map[string]any{"b": float64(1)}}) +} + +func TestTrailingNewline(t *testing.T) { + expectEqual(t, y(t, "a: 1\n"), map[string]any{"a": float64(1)}) +} + +// ===== BLOCK SEQUENCES ===== + +func TestSimpleList(t *testing.T) { + expectEqual(t, y(t, "- a\n- b\n- c"), []any{"a", "b", "c"}) +} + +func TestSingleElement(t *testing.T) { + expectEqual(t, y(t, "- a"), []any{"a"}) +} + +func TestNestedListInMap(t *testing.T) { + expectEqual(t, y(t, "items:\n - a\n - b"), map[string]any{"items": []any{"a", "b"}}) +} + +func TestListOfNumbers(t *testing.T) { + expectEqual(t, y(t, "- 1\n- 2\n- 3"), []any{float64(1), float64(2), float64(3)}) +} + +func TestListOfMaps(t *testing.T) { + expectEqual(t, y(t, "- name: alice\n- name: bob"), + []any{map[string]any{"name": "alice"}, map[string]any{"name": "bob"}}) +} + +func TestNestedListOfMapsMultikey(t *testing.T) { + expectEqual(t, y(t, "items:\n - name: alice\n age: 30\n - name: bob\n age: 25"), + map[string]any{"items": []any{ + map[string]any{"name": "alice", "age": float64(30)}, + map[string]any{"name": "bob", "age": float64(25)}, + }}) +} + +func TestDeeplyNestedList(t *testing.T) { + expectEqual(t, y(t, "a:\n b:\n - x\n - y"), + map[string]any{"a": map[string]any{"b": []any{"x", "y"}}}) +} + +func TestMixedMapThenList(t *testing.T) { + expectEqual(t, y(t, "a: 1\nb:\n - x\n - y\nc: 3"), + map[string]any{"a": float64(1), "b": []any{"x", "y"}, "c": float64(3)}) +} + +// ===== SCALAR TYPES ===== + +func TestInteger(t *testing.T) { + expectEqual(t, y(t, "a: 42"), map[string]any{"a": float64(42)}) +} + +func TestNegativeInteger(t *testing.T) { + expectEqual(t, y(t, "a: -7"), map[string]any{"a": float64(-7)}) +} + +func TestFloat(t *testing.T) { + expectEqual(t, y(t, "a: 3.14"), map[string]any{"a": float64(3.14)}) +} + +func TestZero(t *testing.T) { + expectEqual(t, y(t, "a: 0"), map[string]any{"a": float64(0)}) +} + +func TestBooleanTrue(t *testing.T) { + expectEqual(t, y(t, "a: true"), map[string]any{"a": true}) +} + +func TestBooleanFalse(t *testing.T) { + expectEqual(t, y(t, "a: false"), map[string]any{"a": false}) +} + +func TestNullKeyword(t *testing.T) { + expectEqual(t, y(t, "a: null"), map[string]any{"a": nil}) +} + +func TestTildeNull(t *testing.T) { + expectEqual(t, y(t, "a: ~"), map[string]any{"a": nil}) +} + +func TestEmptyValueNull(t *testing.T) { + expectEqual(t, y(t, "a:"), map[string]any{"a": nil}) +} + +func TestPlainString(t *testing.T) { + expectEqual(t, y(t, "a: hello world"), map[string]any{"a": "hello world"}) +} + +func TestOctalNumber(t *testing.T) { + expectEqual(t, y(t, "a: 0o77"), map[string]any{"a": float64(63)}) +} + +func TestHexNumber(t *testing.T) { + expectEqual(t, y(t, "a: 0xFF"), map[string]any{"a": float64(255)}) +} + +func TestPositiveInfinity(t *testing.T) { + result := y(t, "a: .inf") + m, ok := result.(map[string]any) + if !ok { + t.Fatalf("expected map, got %T", result) + } + v, ok := m["a"].(float64) + if !ok || !math.IsInf(v, 1) { + t.Errorf("expected +Inf, got %v", m["a"]) + } +} + +func TestNegativeInfinity(t *testing.T) { + result := y(t, "a: -.inf") + m, ok := result.(map[string]any) + if !ok { + t.Fatalf("expected map, got %T", result) + } + v, ok := m["a"].(float64) + if !ok || !math.IsInf(v, -1) { + t.Errorf("expected -Inf, got %v", m["a"]) + } +} + +func TestNaN(t *testing.T) { + result := y(t, "a: .nan") + m, ok := result.(map[string]any) + if !ok { + t.Fatalf("expected map, got %T", result) + } + v, ok := m["a"].(float64) + if !ok || !math.IsNaN(v) { + t.Errorf("expected NaN, got %v", m["a"]) + } +} + +func TestYesBoolean(t *testing.T) { + expectEqual(t, y(t, "a: yes"), map[string]any{"a": true}) +} + +func TestNoBoolean(t *testing.T) { + expectEqual(t, y(t, "a: no"), map[string]any{"a": false}) +} + +func TestOnBoolean(t *testing.T) { + expectEqual(t, y(t, "a: on"), map[string]any{"a": true}) +} + +func TestOffBoolean(t *testing.T) { + expectEqual(t, y(t, "a: off"), map[string]any{"a": false}) +} + +// ===== QUOTED STRINGS ===== + +func TestDoubleQuoted(t *testing.T) { + expectEqual(t, y(t, `a: "hello"`), map[string]any{"a": "hello"}) +} + +func TestSingleQuoted(t *testing.T) { + expectEqual(t, y(t, `a: 'hello'`), map[string]any{"a": "hello"}) +} + +func TestDoubleQuotedWithColon(t *testing.T) { + expectEqual(t, y(t, `a: "key: value"`), map[string]any{"a": "key: value"}) +} + +func TestSingleQuotedWithColon(t *testing.T) { + expectEqual(t, y(t, `a: 'key: value'`), map[string]any{"a": "key: value"}) +} + +func TestDoubleQuotedEmpty(t *testing.T) { + expectEqual(t, y(t, `a: ""`), map[string]any{"a": ""}) +} + +func TestSingleQuotedEmpty(t *testing.T) { + expectEqual(t, y(t, `a: ''`), map[string]any{"a": ""}) +} + +func TestQuotedNumberStaysString(t *testing.T) { + expectEqual(t, y(t, `a: "42"`), map[string]any{"a": "42"}) +} + +func TestQuotedBooleanStaysString(t *testing.T) { + expectEqual(t, y(t, `a: "true"`), map[string]any{"a": "true"}) +} + +// ===== BLOCK SCALARS ===== + +func TestLiteralBlock(t *testing.T) { + expectEqual(t, y(t, "a: |\n line1\n line2\n line3"), + map[string]any{"a": "line1\nline2\nline3\n"}) +} + +func TestLiteralBlockStrip(t *testing.T) { + expectEqual(t, y(t, "a: |-\n line1\n line2"), + map[string]any{"a": "line1\nline2"}) +} + +func TestLiteralBlockKeep(t *testing.T) { + expectEqual(t, y(t, "a: |+\n line1\n line2\n\n"), + map[string]any{"a": "line1\nline2\n\n"}) +} + +func TestFoldedBlock(t *testing.T) { + expectEqual(t, y(t, "a: >\n line1\n line2\n line3"), + map[string]any{"a": "line1 line2 line3\n"}) +} + +func TestFoldedBlockStrip(t *testing.T) { + expectEqual(t, y(t, "a: >-\n line1\n line2"), + map[string]any{"a": "line1 line2"}) +} + +func TestFoldedBlockKeep(t *testing.T) { + expectEqual(t, y(t, "a: >+\n line1\n line2\n\n"), + map[string]any{"a": "line1 line2\n\n"}) +} + +func TestLiteralBlockPreservesInnerIndent(t *testing.T) { + expectEqual(t, y(t, "a: |\n line1\n indented\n line3"), + map[string]any{"a": "line1\n indented\nline3\n"}) +} + +// ===== FLOW COLLECTIONS ===== + +func TestFlowSequence(t *testing.T) { + expectEqual(t, y(t, "a: [1, 2, 3]"), map[string]any{"a": []any{float64(1), float64(2), float64(3)}}) +} + +func TestFlowMapping(t *testing.T) { + expectEqual(t, y(t, "a: {x: 1, y: 2}"), map[string]any{"a": map[string]any{"x": float64(1), "y": float64(2)}}) +} + +func TestNestedFlowInBlock(t *testing.T) { + expectEqual(t, y(t, "a: [1, [2, 3]]"), + map[string]any{"a": []any{float64(1), []any{float64(2), float64(3)}}}) +} + +func TestEmptyFlowSequence(t *testing.T) { + expectEqual(t, y(t, "a: []"), map[string]any{"a": []any{}}) +} + +func TestEmptyFlowMapping(t *testing.T) { + expectEqual(t, y(t, "a: {}"), map[string]any{"a": map[string]any{}}) +} + +func TestFlowAtTopLevelSeq(t *testing.T) { + expectEqual(t, y(t, "[1, 2, 3]"), []any{float64(1), float64(2), float64(3)}) +} + +func TestFlowAtTopLevelMap(t *testing.T) { + expectEqual(t, y(t, "{a: 1, b: 2}"), map[string]any{"a": float64(1), "b": float64(2)}) +} + +// ===== COMMENTS ===== + +func TestLineComment(t *testing.T) { + expectEqual(t, y(t, "a: 1 # comment\nb: 2"), + map[string]any{"a": float64(1), "b": float64(2)}) +} + +func TestFullLineComment(t *testing.T) { + expectEqual(t, y(t, "# this is a comment\na: 1"), map[string]any{"a": float64(1)}) +} + +func TestCommentAfterKey(t *testing.T) { + expectEqual(t, y(t, "a: # comment\n b: 1"), + map[string]any{"a": map[string]any{"b": float64(1)}}) +} + +func TestMultipleComments(t *testing.T) { + expectEqual(t, y(t, "# first\na: 1\n# second\nb: 2"), + map[string]any{"a": float64(1), "b": float64(2)}) +} + +func TestCommentInList(t *testing.T) { + expectEqual(t, y(t, "- a # comment\n- b"), []any{"a", "b"}) +} + +// ===== ANCHORS AND ALIASES ===== + +func TestSimpleAnchorAlias(t *testing.T) { + expectEqual(t, y(t, "a: &ref hello\nb: *ref"), + map[string]any{"a": "hello", "b": "hello"}) +} + +func TestAnchorOnMap(t *testing.T) { + expectEqual(t, y(t, "defaults: &defaults\n x: 1\n y: 2\noverride:\n <<: *defaults\n y: 3"), + map[string]any{ + "defaults": map[string]any{"x": float64(1), "y": float64(2)}, + "override": map[string]any{"x": float64(1), "y": float64(3)}, + }) +} + +func TestAnchorOnSequence(t *testing.T) { + expectEqual(t, y(t, "a: &items\n - 1\n - 2\nb: *items"), + map[string]any{ + "a": []any{float64(1), float64(2)}, + "b": []any{float64(1), float64(2)}, + }) +} + +func TestMultipleAliases(t *testing.T) { + expectEqual(t, y(t, "a: &x 10\nb: &y 20\nc: *x\nd: *y"), + map[string]any{"a": float64(10), "b": float64(20), "c": float64(10), "d": float64(20)}) +} + +// ===== MERGE KEY ===== + +func TestSimpleMerge(t *testing.T) { + expectEqual(t, y(t, "defaults: &d\n a: 1\n b: 2\nresult:\n <<: *d\n c: 3"), + map[string]any{ + "defaults": map[string]any{"a": float64(1), "b": float64(2)}, + "result": map[string]any{"a": float64(1), "b": float64(2), "c": float64(3)}, + }) +} + +func TestMergeOverride(t *testing.T) { + expectEqual(t, y(t, "base: &b\n x: 1\n y: 2\nchild:\n <<: *b\n y: 99"), + map[string]any{ + "base": map[string]any{"x": float64(1), "y": float64(2)}, + "child": map[string]any{"x": float64(1), "y": float64(99)}, + }) +} + +// ===== MULTI-DOCUMENT ===== + +func TestDocumentStartMarker(t *testing.T) { + expectEqual(t, y(t, "---\na: 1"), map[string]any{"a": float64(1)}) +} + +func TestDocumentEndMarker(t *testing.T) { + expectEqual(t, y(t, "a: 1\n..."), map[string]any{"a": float64(1)}) +} + +// ===== TAGS ===== + +func TestExplicitStringTag(t *testing.T) { + expectEqual(t, y(t, "a: !!str 42"), map[string]any{"a": "42"}) +} + +func TestExplicitIntTag(t *testing.T) { + expectEqual(t, y(t, `a: !!int "42"`), map[string]any{"a": float64(42)}) +} + +func TestExplicitFloatTag(t *testing.T) { + expectEqual(t, y(t, `a: !!float "3.14"`), map[string]any{"a": float64(3.14)}) +} + +func TestExplicitBoolTag(t *testing.T) { + expectEqual(t, y(t, `a: !!bool "true"`), map[string]any{"a": true}) +} + +func TestExplicitNullTag(t *testing.T) { + expectEqual(t, y(t, `a: !!null ""`), map[string]any{"a": nil}) +} + +// ===== COMPLEX KEYS ===== + +func TestExplicitKey(t *testing.T) { + expectEqual(t, y(t, "? a\n: 1"), map[string]any{"a": float64(1)}) +} + +func TestNumericKey(t *testing.T) { + expectEqual(t, y(t, "1: one\n2: two"), map[string]any{"1": "one", "2": "two"}) +} + +// ===== DIRECTIVES ===== + +func TestYamlDirective(t *testing.T) { + // Should not error - directive stripped + result, err := Parse("%YAML 1.2\n---\na: 1") + if err != nil { + t.Fatalf("Parse error: %v", err) + } + expectEqual(t, result, map[string]any{"a": float64(1)}) +} + +func TestTagDirective(t *testing.T) { + result, err := Parse("%TAG ! tag:example.com,2000:\n---\na: 1") + if err != nil { + t.Fatalf("Parse error: %v", err) + } + expectEqual(t, result, map[string]any{"a": float64(1)}) +} + +// ===== INDENTATION ===== + +func TestTwoSpaceIndent(t *testing.T) { + expectEqual(t, y(t, "a:\n b: 1"), map[string]any{"a": map[string]any{"b": float64(1)}}) +} + +func TestFourSpaceIndent(t *testing.T) { + expectEqual(t, y(t, "a:\n b: 1"), map[string]any{"a": map[string]any{"b": float64(1)}}) +} + +func TestMixedIndentLevels(t *testing.T) { + expectEqual(t, y(t, "a:\n b:\n c: 1"), + map[string]any{"a": map[string]any{"b": map[string]any{"c": float64(1)}}}) +} + +func TestReturnToOuterIndent(t *testing.T) { + expectEqual(t, y(t, "a:\n b: 1\n c: 2\nd: 3"), + map[string]any{"a": map[string]any{"b": float64(1), "c": float64(2)}, "d": float64(3)}) +} + +func TestMultipleIndentReturns(t *testing.T) { + expectEqual(t, y(t, "a:\n b:\n c: 1\n d: 2\ne: 3"), + map[string]any{"a": map[string]any{"b": map[string]any{"c": float64(1)}, "d": float64(2)}, "e": float64(3)}) +} + +func TestListIndentUnderMap(t *testing.T) { + expectEqual(t, y(t, "a:\n - 1\n - 2\nb: 3"), + map[string]any{"a": []any{float64(1), float64(2)}, "b": float64(3)}) +} + +// ===== MULTILINE PLAIN SCALARS ===== + +func TestContinuationLine(t *testing.T) { + expectEqual(t, y(t, "a: this is\n a long string"), + map[string]any{"a": "this is a long string"}) +} + +func TestMultipleContinuationLines(t *testing.T) { + expectEqual(t, y(t, "a: line one\n line two\n line three"), + map[string]any{"a": "line one line two line three"}) +} + +// ===== WINDOWS LINE ENDINGS ===== + +func TestCRLF(t *testing.T) { + expectEqual(t, y(t, "a: 1\r\nb: 2"), + map[string]any{"a": float64(1), "b": float64(2)}) +} + +func TestCRLFNested(t *testing.T) { + expectEqual(t, y(t, "a:\r\n b: 1\r\n c: 2"), + map[string]any{"a": map[string]any{"b": float64(1), "c": float64(2)}}) +} + +func TestCRLFList(t *testing.T) { + expectEqual(t, y(t, "- a\r\n- b"), []any{"a", "b"}) +} + +// ===== SPECIAL CHARS IN VALUES ===== + +func TestValueWithHashNotComment(t *testing.T) { + expectEqual(t, y(t, "a: foo#bar"), map[string]any{"a": "foo#bar"}) +} + +func TestKeyWithSpaces(t *testing.T) { + expectEqual(t, y(t, "a long key: value"), map[string]any{"a long key": "value"}) +} + +// ===== SEQUENCE OF MAPPINGS ===== + +func TestCompactNotation(t *testing.T) { + expectEqual(t, y(t, "- name: alice\n age: 30\n- name: bob\n age: 25"), + []any{ + map[string]any{"name": "alice", "age": float64(30)}, + map[string]any{"name": "bob", "age": float64(25)}, + }) +} + +func TestSingleKeyPerElement(t *testing.T) { + expectEqual(t, y(t, "- a: 1\n- b: 2\n- c: 3"), + []any{map[string]any{"a": float64(1)}, map[string]any{"b": float64(2)}, map[string]any{"c": float64(3)}}) +} + +func TestNestedInMap(t *testing.T) { + expectEqual(t, y(t, "people:\n - name: alice\n - name: bob"), + map[string]any{"people": []any{map[string]any{"name": "alice"}, map[string]any{"name": "bob"}}}) +} + +// ===== REAL-WORLD YAML PATTERNS ===== + +func TestDockerComposeLike(t *testing.T) { + expectEqual(t, y(t, "version: 3\nservices:\n web:\n image: nginx\n ports:\n - 80\n - 443"), + map[string]any{ + "version": float64(3), + "services": map[string]any{ + "web": map[string]any{ + "image": "nginx", + "ports": []any{float64(80), float64(443)}, + }, + }, + }) +} + +func TestGithubActionsLike(t *testing.T) { + expectEqual(t, y(t, "name: build\non:\n push:\n branches:\n - main\njobs:\n test:\n runs-on: ubuntu"), + map[string]any{ + "name": "build", + "on": map[string]any{"push": map[string]any{"branches": []any{"main"}}}, + "jobs": map[string]any{"test": map[string]any{"runs-on": "ubuntu"}}, + }) +} + +func TestKubernetesLike(t *testing.T) { + expectEqual(t, y(t, "apiVersion: v1\nkind: Pod\nmetadata:\n name: myapp\n labels:\n app: myapp\nspec:\n containers:\n - name: web\n image: nginx"), + map[string]any{ + "apiVersion": "v1", + "kind": "Pod", + "metadata": map[string]any{"name": "myapp", "labels": map[string]any{"app": "myapp"}}, + "spec": map[string]any{"containers": []any{map[string]any{"name": "web", "image": "nginx"}}}, + }) +} + +func TestAnsibleLike(t *testing.T) { + expectEqual(t, y(t, "- name: install packages\n become: true\n- name: start service\n become: false"), + []any{ + map[string]any{"name": "install packages", "become": true}, + map[string]any{"name": "start service", "become": false}, + }) +} + +func TestConfigFileLike(t *testing.T) { + expectEqual(t, y(t, "database:\n host: localhost\n port: 5432\n name: mydb\ncache:\n enabled: true\n ttl: 3600"), + map[string]any{ + "database": map[string]any{"host": "localhost", "port": float64(5432), "name": "mydb"}, + "cache": map[string]any{"enabled": true, "ttl": float64(3600)}, + }) +} + +// ===== EMPTY INPUT ===== + +func TestEmptyInput(t *testing.T) { + result, err := Parse("") + if err != nil { + t.Fatalf("Parse error: %v", err) + } + if result != nil { + t.Errorf("expected nil, got %v", result) + } +} + +func TestWhitespaceOnly(t *testing.T) { + result, err := Parse(" \n \n ") + if err != nil { + t.Fatalf("Parse error: %v", err) + } + if result != nil { + t.Errorf("expected nil, got %v", result) + } +} + +// ===== HAPPY PATH ===== + +func TestHappy(t *testing.T) { + expectEqual(t, y(t, "a: 1\nb: 2\nc:\n d: 3\n e: 4\n f:\n - g\n - h\n"), + map[string]any{ + "a": float64(1), + "b": float64(2), + "c": map[string]any{ + "d": float64(3), + "e": float64(4), + "f": []any{"g", "h"}, + }, + }) +} diff --git a/src/yaml.ts b/src/yaml.ts index 8f25a06..0bf8bb6 100644 --- a/src/yaml.ts +++ b/src/yaml.ts @@ -34,6 +34,261 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { let pendingExplicitCL = false // Queue for tokens that need to be emitted across multiple lex calls. let pendingTokens: any[] = [] + // TAG directive handle mappings (e.g. %TAG !! tag:example.com/). + // When !! is redefined, built-in type conversion is skipped. + let tagHandles: Record = {} + + // Preprocess flow collections for YAML-specific features. + // Transforms flow collection content to be Jsonic-compatible: + // - Implicit null-valued keys in flow mappings: {a, b: c} → {a: ~, b: c} + // - Comments between key and colon: {"foo" # comment\n :bar} → {"foo" :bar} + // - Multiline quoted scalars in flow context: {"multi\n line"} → {"multi line"} + // - Explicit keys (?) inside flow collections + function preprocessFlowCollections(src: string): string { + let result = '' + let i = 0 + + while (i < src.length) { + if (src[i] === '{' || src[i] === '[') { + // Only treat as flow collection if it's at a value position: + // after start of string, after newline+indent, after ": ", after "- ", + // after ",", after "[" or "{", or preceded only by whitespace on its line. + if (isFlowCollectionStart(src, i)) { + let processed = processFlowCollection(src, i) + result += processed.text + i = processed.end + continue + } + } + result += src[i] + i++ + } + return result + } + + // Determine if { or [ at position i is a flow collection opener. + function isFlowCollectionStart(src: string, i: number): boolean { + if (i === 0) return true + // Look backward to find the preceding meaningful character. + let j = i - 1 + while (j >= 0 && (src[j] === ' ' || src[j] === '\t')) j-- + if (j < 0) return true + let prev = src[j] + // After newline: it's a flow collection if it's the first thing on the line. + if (prev === '\n' || prev === '\r') return true + // After value/element/separator indicators. + if (prev === ':' || prev === '-' || prev === ',' || + prev === '[' || prev === '{') return true + return false + } + + function processFlowCollection(src: string, start: number): { text: string, end: number } { + let open = src[start] + let close = open === '{' ? '}' : ']' + let isMap = open === '{' + let out = open + let i = start + 1 + + // Track entries in flow mappings to detect implicit null-valued keys. + let entryHasColon = false + let entryParts: string[] = [] + + while (i < src.length) { + let ch = src[i] + + // Handle nested flow collections recursively. + if (ch === '{' || ch === '[') { + let nested = processFlowCollection(src, i) + if (isMap) { + entryParts.push(nested.text) + entryHasColon = true // nested structures count as values + } else { + out += nested.text + } + i = nested.end + continue + } + + // Handle quoted strings. + if (ch === '"') { + let str = '"' + i++ + while (i < src.length && src[i] !== '"') { + if (src[i] === '\\') { str += src[i]; i++ } + // Multiline double-quoted string: fold newlines into space. + if (src[i] === '\n' || src[i] === '\r') { + if (src[i] === '\r' && src[i + 1] === '\n') i++ + str += ' ' + i++ + while (i < src.length && (src[i] === ' ' || src[i] === '\t')) i++ + continue + } + str += src[i] + i++ + } + if (i < src.length) { str += '"'; i++ } + if (isMap) entryParts.push(str) + else out += str + continue + } + + if (ch === "'") { + let str = "'" + i++ + while (i < src.length) { + if (src[i] === "'" && src[i + 1] === "'") { str += "''"; i += 2; continue } + if (src[i] === "'") break + // Multiline single-quoted string: fold newlines into space. + if (src[i] === '\n' || src[i] === '\r') { + if (src[i] === '\r' && src[i + 1] === '\n') i++ + str += ' ' + i++ + while (i < src.length && (src[i] === ' ' || src[i] === '\t')) i++ + continue + } + str += src[i] + i++ + } + if (i < src.length) { str += "'"; i++ } + if (isMap) entryParts.push(str) + else out += str + continue + } + + // Handle comments: strip them in flow context. + if (ch === '#') { + // Treat as comment if preceded by whitespace or at start of line. + if (i > 0 && (src[i - 1] === ' ' || src[i - 1] === '\t' || + src[i - 1] === '\n' || src[i - 1] === '\r')) { + while (i < src.length && src[i] !== '\n' && src[i] !== '\r') i++ + if (isMap) entryParts.push(' ') + else out += ' ' + continue + } + } + + // Handle newlines in flow context: fold into space. + if (ch === '\n' || ch === '\r') { + if (ch === '\r' && src[i + 1] === '\n') i++ + i++ + // Skip leading whitespace on continuation line. + while (i < src.length && (src[i] === ' ' || src[i] === '\t')) i++ + if (isMap) entryParts.push(' ') + else out += ' ' + continue + } + + // Handle colon (key-value separator in flow mapping). + if (isMap && ch === ':' && (src[i + 1] === ' ' || src[i + 1] === '\t' || + src[i + 1] === ',' || src[i + 1] === '}' || src[i + 1] === ']' || + src[i + 1] === '\n' || src[i + 1] === '\r' || src[i + 1] === undefined)) { + entryHasColon = true + entryParts.push(ch) + i++ + continue + } + + // Handle adjacent colon (no space after) as key-value separator in flow. + if (isMap && ch === ':' && i > start + 1) { + // Check if preceded by a quoted string close in the accumulated parts. + let accumulated = entryParts.join('').trimEnd() + if (accumulated.endsWith('"') || accumulated.endsWith("'")) { + entryHasColon = true + } + entryParts.push(ch) + i++ + continue + } + + // Handle comma: end of entry. + if (ch === ',') { + if (isMap) { + let entry = entryParts.join('').trim() + if (!entryHasColon && entry.length > 0) { + out += entry + ': ~,' + } else { + out += entry + ',' + } + entryParts = [] + entryHasColon = false + } else { + out += ch + } + i++ + continue + } + + // Handle closing bracket. + if (ch === close) { + if (isMap) { + let entry = entryParts.join('').trim() + if (!entryHasColon && entry.length > 0) { + out += entry + ': ~' + } else { + out += entry + } + } + out += close + i++ + return { text: out, end: i } + } + + // Handle explicit key indicator in flow context. + // Only at the start of an entry (after open bracket/brace, comma, + // or after newline with only whitespace before it). + if (ch === '?' && (src[i + 1] === ' ' || src[i + 1] === '\t')) { + let isEntryStart = false + if (!isMap) { + // Check if ? is at entry start position in sequence. + let prevContent = out.trimEnd() + let lastChar = prevContent[prevContent.length - 1] + isEntryStart = lastChar === '[' || lastChar === ',' + } else { + let accumulated = entryParts.join('').trim() + isEntryStart = accumulated.length === 0 + } + if (isEntryStart && !isMap) { + // Convert [? key : val] → [{key: val}] + out += '{' + let inner = '' + i += 2 + while (i < src.length && src[i] !== ',' && src[i] !== close) { + if (src[i] === '\n' || src[i] === '\r') { + if (src[i] === '\r' && src[i + 1] === '\n') i++ + inner += ' ' + i++ + while (i < src.length && (src[i] === ' ' || src[i] === '\t')) i++ + continue + } + if (src[i] === '#') { + while (i < src.length && src[i] !== '\n' && src[i] !== '\r') i++ + continue + } + inner += src[i] + i++ + } + out += inner.trim() + '}' + continue + } else if (isEntryStart && isMap) { + // In flow mapping, ? is an explicit key indicator — skip it. + i += 2 + continue + } + } + + // Regular character. + if (isMap) entryParts.push(ch) + else out += ch + i++ + } + + // Unclosed collection — return what we have. + if (isMap) { + out += entryParts.join('') + } + out += close + return { text: out, end: i } + } jsonic.options({ fixed: { @@ -130,43 +385,62 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { // indent of the containing block (e.g., the mapping key), which // may differ from the line's leading spaces (e.g., after "- "). if (explicitIndent > 0) { - // Find the column of the colon that precedes the block indicator. - // The key's indent level is the number of spaces before the key - // on this line, which accounts for "- " prefixes. + // Find the line containing the block indicator. let li = pnt.sI - 1 while (li > 0 && lex.src[li - 1] !== '\n' && lex.src[li - 1] !== '\r') li-- // li is now at the start of the line. Find the colon position. let keyCol = containingIndent + // Check if there's a colon on the SAME line as the block indicator. + let hasColonOnLine = false for (let ci = li + containingIndent; ci < pnt.sI; ci++) { if (lex.src[ci] === ':' && (lex.src[ci+1] === ' ' || lex.src[ci+1] === '\t')) { - // Key indent is the column of the first non-space after the - // sequence indicators. For "- aaa:", keyCol is 2 (after "- "). - // For " aaa:", keyCol is 2 (leading spaces). + hasColonOnLine = true break } } - // Check for sequence indicators: each "- " adds to the effective indent. - // But only when the block scalar is a value inside a mapping within - // the sequence (e.g., "- key: |2"), not when it's a direct value - // (e.g., "- |1"). Detect by checking for ": " between "- " and the - // block indicator. - let scanI = li + containingIndent - let hasColon = false - for (let ci = scanI; ci < pnt.sI; ci++) { - if (lex.src[ci] === ':' && (lex.src[ci+1] === ' ' || lex.src[ci+1] === '\t')) { - hasColon = true - break - } - } - if (hasColon) { + if (hasColonOnLine) { + // Block indicator on same line as colon (e.g., "key: |2"). + // Check for sequence indicators: each "- " adds to the effective indent. + let scanI = li + containingIndent while (scanI < pnt.sI && lex.src[scanI] === '-' && (lex.src[scanI+1] === ' ' || lex.src[scanI+1] === '\t')) { keyCol += 2 scanI += 2 while (scanI < pnt.sI && lex.src[scanI] === ' ') { keyCol++; scanI++ } } + blockIndent = keyCol + explicitIndent + } else { + // Block indicator on its own line (e.g., after a tag on + // a separate line). Look backward to find the parent + // mapping key's indent by scanning previous lines for + // the colon that started this value context. + let parentIndent = 0 + let searchI = li - 1 + while (searchI > 0) { + // Find start of previous line. + if (lex.src[searchI] === '\n') searchI-- + if (lex.src[searchI] === '\r') searchI-- + let prevLineEnd = searchI + 1 + while (searchI > 0 && lex.src[searchI - 1] !== '\n' && lex.src[searchI - 1] !== '\r') searchI-- + let prevLineStart = searchI + // Check if this line has a colon (mapping key). + for (let ci = prevLineStart; ci < prevLineEnd; ci++) { + if (lex.src[ci] === ':' && (lex.src[ci+1] === ' ' || lex.src[ci+1] === '\t' || + lex.src[ci+1] === '\n' || lex.src[ci+1] === '\r' || ci+1 >= prevLineEnd)) { + // Found the parent key line. Get its indent. + parentIndent = 0 + let pi = prevLineStart + while (pi < prevLineEnd && lex.src[pi] === ' ') { parentIndent++; pi++ } + break + } + } + break // Only check the immediately preceding non-blank line. + } + blockIndent = parentIndent + explicitIndent + // Update containingIndent to parent's indent so the + // "blockIndent <= containingIndent" check below works. + containingIndent = parentIndent } - blockIndent = keyCol + explicitIndent } if (blockIndent <= containingIndent && !isDocStart && idx < fwd.length) { // Content is not indented enough — empty block scalar. @@ -465,7 +739,9 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { } // The minimum indent for continuation lines. - let minContinuationIndent = isMapValue ? keyIndent + 1 : currentLineIndent + // For map values, continuation indent is based on the colon's line indent, + // not the previous line's indent (which may be a key continuation line). + let minContinuationIndent = isMapValue ? currentLineIndent + 1 : currentLineIndent let text = '' let i = 0 let totalConsumed = 0 @@ -523,10 +799,30 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { (fwd[i] === '.' && fwd[i+1] === '.' && fwd[i+2] === '.' && (fwd[i+3] === ' ' || fwd[i+3] === '\t' || fwd[i+3] === '\n' || fwd[i+3] === '\r' || fwd[i+3] === undefined))) - // Check for sequence marker "- ". - let isSeqMarker = fwd[i] === '-' && - (fwd[i+1] === ' ' || fwd[i+1] === '\t' || fwd[i+1] === '\n' || - fwd[i+1] === '\r' || fwd[i+1] === undefined) + // Check for sequence marker "- ". Only treat as a new sequence + // entry when the indent matches an enclosing sequence's level. + // Find the nearest "- " sequence marker preceding the text on + // the first line to determine the relevant sequence indent. + let isSeqMarker = false + if (fwd[i] === '-' && + (fwd[i+1] === ' ' || fwd[i+1] === '\t' || fwd[i+1] === '\n' || + fwd[i+1] === '\r' || fwd[i+1] === undefined)) { + // Determine the sequence indent from the first line's context. + // Look backward from pnt.sI to find "- " markers before the text. + let seqIndent = -1 + let si = pnt.sI - 1 + while (si >= lineStart) { + if (lex.src[si] === '-' && (lex.src[si+1] === ' ' || lex.src[si+1] === '\t')) { + seqIndent = si - lineStart + break + } + si-- + } + // isSeqMarker if the continuation "- " matches a known sequence + // indent, or if it's at the current line indent level. + isSeqMarker = (seqIndent >= 0 && lineIndent === seqIndent) || + (seqIndent < 0 && lineIndent <= currentLineIndent) + } let canContinue = inFlowCtx ? (i < fwd.length && fwd[i] !== '\n' && fwd[i] !== '\r' && fwd[i] !== '#' && fwd[i] !== '{' && fwd[i] !== '}' && @@ -650,6 +946,15 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { if (src[0] === '%') { let dIdx = src.indexOf('\n---') if (dIdx >= 0) { + // Parse %TAG directives before stripping. + let dirBlock = src.substring(0, dIdx) + let dirLines = dirBlock.split('\n') + for (let dl of dirLines) { + let tagMatch = dl.match(/^%TAG\s+(\S+)\s+(\S+)/) + if (tagMatch) { + tagHandles[tagMatch[1]] = tagMatch[2] + } + } hadDirective = true src = src.substring(dIdx + 1) } @@ -734,6 +1039,14 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { if (docStripped && /^(---|\.\.\.)(\s|$)/.test(src)) { src = '' } + // Preprocess flow collections for YAML-specific features + // that Jsonic's core parser doesn't handle natively: + // - Implicit null-valued keys in flow mappings: {a, b: c} + // - Comments between key and colon: {"foo" # comment\n :bar} + // - Multiline plain/quoted scalars in flow context + // - Explicit keys (?) inside flow collections + src = preprocessFlowCollections(src) + lex.src = src lex.pnt.len = src.length // If source is empty/whitespace/comments-only after preprocessing, @@ -792,7 +1105,13 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { while (nameEnd < fwd.length && fwd[nameEnd] !== ' ' && fwd[nameEnd] !== '\t' && fwd[nameEnd] !== '\n' && fwd[nameEnd] !== '\r' && fwd[nameEnd] !== ',' && fwd[nameEnd] !== '{' && fwd[nameEnd] !== '}' && fwd[nameEnd] !== '[' && - fwd[nameEnd] !== ']' && fwd[nameEnd] !== ':') nameEnd++ + fwd[nameEnd] !== ']') { + // Colon terminates only when followed by space/tab (key-value separator). + // Otherwise colon is a valid anchor-name character per YAML spec. + if (fwd[nameEnd] === ':' && + (fwd[nameEnd+1] === ' ' || fwd[nameEnd+1] === '\t')) break + nameEnd++ + } let name = fwd.substring(1, nameEnd) let src = fwd.substring(0, nameEnd) // Check if this alias is used as a map key (followed by ` :` or `:`). @@ -810,9 +1129,22 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { pnt.cI += nameEnd return tkn } - // Store the alias name as a special marker object. - let marker = { __yamlAlias: name } - let tkn = lex.token('#VL', marker, src, lex.pnt) + // Resolve alias immediately if anchor exists, since deferred + // markers can be lost through Jsonic's rule processing. + let tkn: any + if (anchors[name] !== undefined) { + let val = anchors[name] + if (typeof val === 'object' && val !== null) { + val = JSON.parse(JSON.stringify(val)) + } + let tin = typeof val === 'string' ? '#TX' : + typeof val === 'number' ? '#NR' : '#VL' + tkn = lex.token(tin, val, src, lex.pnt) + } else { + // Anchor not yet seen — store marker for deferred resolution. + let marker = { __yamlAlias: name } + tkn = lex.token('#VL', marker, src, lex.pnt) + } pnt.sI += nameEnd pnt.cI += nameEnd return tkn @@ -965,6 +1297,33 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { if (fwd[tagEnd] === ' ') tagEnd++ // skip space after tag pnt.sI += tagEnd pnt.cI += tagEnd + // If tag is standalone (followed by newline), consume the + // newline and leading spaces so no extra #IN is emitted. + if (pnt.sI < lex.src.length && + (lex.src[pnt.sI] === '\n' || lex.src[pnt.sI] === '\r')) { + // Check if tag is standalone on its line. + let tagStandalone = true + let tagLineIndent = 0 + let bi = pnt.sI - tagEnd - 1 + while (bi >= 0 && lex.src[bi] !== '\n' && lex.src[bi] !== '\r') { + if (lex.src[bi] !== ' ' && lex.src[bi] !== '\t') { + tagStandalone = false + break + } + tagLineIndent++ + bi-- + } + if (tagStandalone) { + let nl = pnt.sI + if (lex.src[nl] === '\r') nl++ + if (lex.src[nl] === '\n') nl++ + let spaces = 0 + while (nl + spaces < lex.src.length && lex.src[nl + spaces] === ' ') spaces++ + pnt.sI = nl + spaces + pnt.cI = spaces + pnt.rI++ + } + } fwd = lex.src.substring(pnt.sI) // Restart matching to parse the value. continue yamlMatchLoop @@ -1053,10 +1412,12 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { if (fwd[valEnd] === q) valEnd++ let rawVal = fwd.substring(valStart + 1, valEnd - 1) let result: any = rawVal - if (tag === 'int') result = parseInt(rawVal, 10) - else if (tag === 'float') result = parseFloat(rawVal) - else if (tag === 'bool') result = rawVal === 'true' || rawVal === 'True' || rawVal === 'TRUE' - else if (tag === 'null') result = null + if (!tagHandles['!!']) { + if (tag === 'int') result = parseInt(rawVal, 10) + else if (tag === 'float') result = parseFloat(rawVal) + else if (tag === 'bool') result = rawVal === 'true' || rawVal === 'True' || rawVal === 'TRUE' + else if (tag === 'null') result = null + } if (tagAnchorName) anchors[tagAnchorName] = result let tknTin = typeof result === 'string' ? '#TX' : typeof result === 'number' ? '#NR' : '#VL' @@ -1092,11 +1453,16 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { } let rawVal = fwd.substring(valStart, valEnd).replace(/\s+$/, '') let result: any = rawVal - if (tag === 'str') result = String(rawVal) - else if (tag === 'int') result = parseInt(rawVal, 10) - else if (tag === 'float') result = parseFloat(rawVal) - else if (tag === 'bool') result = rawVal === 'true' || rawVal === 'True' || rawVal === 'TRUE' - else if (tag === 'null') result = null + // Only apply built-in type conversion when !! has not been + // redefined by a %TAG directive. Custom tag handles mean + // !!type is a user-defined tag, not a YAML core type. + if (!tagHandles['!!']) { + if (tag === 'str') result = String(rawVal) + else if (tag === 'int') result = parseInt(rawVal, 10) + else if (tag === 'float') result = parseFloat(rawVal) + else if (tag === 'bool') result = rawVal === 'true' || rawVal === 'True' || rawVal === 'TRUE' + else if (tag === 'null') result = null + } if (tagAnchorName) anchors[tagAnchorName] = result // Use #ST for empty strings (jsonic handles #ST better than // empty #TX in flow context), #NR for numbers, #VL for null. @@ -1149,7 +1515,61 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { while (li > 0 && lex.src[li-1] !== '\n' && lex.src[li-1] !== '\r') li-- while (li < pnt.sI && lex.src[li] === ' ') { qIndent++; li++ } } - // Scan continuation lines for key. + // Count extra rows consumed (for multiline keys). + let extraRows = 0 + + // Handle block scalar keys (| or >). + let blockScalarMatch = key.match(/^([|>])([+-]?)([0-9]?)$/) + if (blockScalarMatch) { + let isFolded = blockScalarMatch[1] === '>' + let chomp = blockScalarMatch[2] || '' + let explicitIndent = blockScalarMatch[3] ? parseInt(blockScalarMatch[3]) : 0 + // Collect block scalar content lines. + let blockLines: string[] = [] + let contentIndent = 0 + while (consumed < fwd.length) { + let lineIndent = 0 + while (consumed + lineIndent < fwd.length && fwd[consumed + lineIndent] === ' ') lineIndent++ + let afterSpaces = consumed + lineIndent + // Empty line or line with only spaces. + if (afterSpaces >= fwd.length || fwd[afterSpaces] === '\n' || fwd[afterSpaces] === '\r') { + blockLines.push('') + consumed = afterSpaces + if (consumed < fwd.length && fwd[consumed] === '\r') consumed++ + if (consumed < fwd.length && fwd[consumed] === '\n') consumed++ + extraRows++ + continue + } + // Determine content indent from first non-empty line. + if (contentIndent === 0) { + contentIndent = explicitIndent > 0 ? qIndent + explicitIndent : lineIndent + } + // Line must be indented more than ? to be content. + if (lineIndent < contentIndent) break + // Collect line content. + let lineEnd = afterSpaces + while (lineEnd < fwd.length && fwd[lineEnd] !== '\n' && fwd[lineEnd] !== '\r') lineEnd++ + blockLines.push(fwd.substring(consumed + contentIndent, lineEnd)) + consumed = lineEnd + if (consumed < fwd.length && fwd[consumed] === '\r') consumed++ + if (consumed < fwd.length && fwd[consumed] === '\n') consumed++ + extraRows++ + } + // Apply chomping. + // Remove trailing empty lines for non-keep. + if (chomp !== '+') { + while (blockLines.length > 0 && blockLines[blockLines.length - 1] === '') blockLines.pop() + } + if (isFolded) { + key = blockLines.join(' ') + '\n' + } else { + key = blockLines.join('\n') + '\n' + } + if (chomp === '-') { + key = key.replace(/\n$/, '') + } + } else { + // Scan continuation lines for key (plain scalar multiline). while (consumed < fwd.length) { // Skip comment lines. let lineIndent = 0 @@ -1161,6 +1581,7 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { beforeNewline = afterSpaces if (afterSpaces < fwd.length && fwd[afterSpaces] === '\r') afterSpaces++ if (afterSpaces < fwd.length && fwd[afterSpaces] === '\n') afterSpaces++ + extraRows++ consumed = afterSpaces continue } @@ -1181,10 +1602,12 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { beforeNewline = consumed if (consumed < fwd.length && fwd[consumed] === '\r') consumed++ if (consumed < fwd.length && fwd[consumed] === '\n') consumed++ + extraRows++ continue } break } + } // Now check if the next non-comment line starts with `:`. let hasValue = false let valConsumed = consumed @@ -1204,8 +1627,9 @@ const Yaml: Plugin = (jsonic: Jsonic, _options: YamlOptions) => { let src = fwd.substring(0, hasValue ? consumed : keyEnd) if (hasValue) { pnt.sI += valConsumed - pnt.rI++ - pnt.cI = 1 + pnt.rI += 1 + extraRows + // Set column to actual position after `: ` on the value line (1-indexed). + pnt.cI = valConsumed - consumed + 1 // Has `: value` — emit KEY now, CL on next call. pendingExplicitCL = true } else { diff --git a/test/yaml-test-suite.test.ts b/test/yaml-test-suite.test.ts index 0216183..1ebc627 100644 --- a/test/yaml-test-suite.test.ts +++ b/test/yaml-test-suite.test.ts @@ -27,18 +27,6 @@ const SUITE_DIR = join(__dirname, '..', 'test', 'yaml-test-suite') // or edge cases where Jsonic's base grammar conflicts with YAML semantics. // As parser coverage improves, entries should be removed and tests should pass. const SKIP: Record = { - '2SXE': 'output mismatch', - '5WE3': 'parse error', - '8KB6': 'parse error', - '9BXH': 'parse error', - 'A2M4': 'output mismatch', - 'AB8U': 'output mismatch', - 'CT4Q': 'parse error', - 'JTV5': 'parse error', - 'K3WX': 'parse error', - 'M5C3': 'parse error', - 'P76L': 'output mismatch', - 'W5VH': 'output mismatch', } @@ -227,7 +215,7 @@ describe('yaml-test-suite', () => { const skipReason = SKIP[tc.id] test(`${tc.id}: ${tc.name}`, { skip: skipReason || undefined }, () => { - const inYaml = readFileSync(join(tc.dir, 'in.yaml'), 'utf8') + const inYaml = readFileSync(join(tc.dir, 'in.yaml'), 'utf8').replace(/\r\n/g, '\n') const inJsonRaw = readFileSync(join(tc.dir, 'in.json'), 'utf8') const { value: expected, multiDoc } = parseExpectedJson(inJsonRaw) @@ -259,7 +247,7 @@ describe('yaml-test-suite', () => { const skipReason = SKIP[tc.id] test(`${tc.id}: ${tc.name}`, { skip: skipReason || undefined }, () => { - const inYaml = readFileSync(join(tc.dir, 'in.yaml'), 'utf8') + const inYaml = readFileSync(join(tc.dir, 'in.yaml'), 'utf8').replace(/\r\n/g, '\n') const j = Jsonic.make().use(Yaml) let threw = false