From 9c46d72196c420afd8b979b75077260460b043a7 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 15:22:44 +0000 Subject: [PATCH 1/4] Rewrite Go CSV to use jsonic grammar rules matching TypeScript implementation The previous implementation bypassed jsonic's grammar system entirely, using a standalone parser. This rewrite mirrors the TypeScript CSV plugin architecture with proper jsonic grammar rules (csv, newline, record, list, elem, val, text) and a custom CSV string matcher for RFC 4180 double-quote escaping. Key changes: - csv.go: Options and types only (removed standalone parser) - plugin.go: Full grammar-based implementation using jsonic Rule/AltSpec - Custom #RL/#RS token types to work around jsonic's global IGNORE set - Single-newline lexing for proper empty record handling - Comment starter cleanup when comments are disabled https://claude.ai/code/session_01Ehie1ims4jJtda9A6MmD1x --- go/csv.go | 406 ++---------------------- go/plugin.go | 867 +++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 834 insertions(+), 439 deletions(-) diff --git a/go/csv.go b/go/csv.go index 12440ea..c142573 100644 --- a/go/csv.go +++ b/go/csv.go @@ -1,23 +1,18 @@ package csv -import ( - "strconv" - "strings" - "unicode" -) - // CsvOptions configures the CSV parser. type CsvOptions struct { - Object *bool // Return objects (default true) or arrays - Header *bool // First row is header (default true) - Trim *bool // Trim whitespace from values - Comment *bool // Enable # comments - Number *bool // Parse numeric values - Value *bool // Parse true/false/null - Strict *bool // Strict CSV mode (default true) - Field *FieldOptions // Field options - Record *RecordOptions // Record options - Stream StreamFunc // Streaming callback + Object *bool // Return objects (default true) or arrays + Header *bool // First row is header (default true) + Trim *bool // Trim whitespace from values + Comment *bool // Enable # comments + Number *bool // Parse numeric values + Value *bool // Parse true/false/null + Strict *bool // Strict CSV mode (default true) + Field *FieldOptions // Field options + Record *RecordOptions // Record options + String *StringOptions // String options + Stream StreamFunc // Streaming callback } // FieldOptions configures field handling. @@ -35,6 +30,12 @@ type RecordOptions struct { Empty bool // Preserve empty records (default false) } +// StringOptions configures string handling. +type StringOptions struct { + Quote string // Quote character (default `"`) + Csv *bool // Force CSV string mode +} + // StreamFunc is a callback for streaming CSV parsing. type StreamFunc func(what string, record any) @@ -54,7 +55,8 @@ type resolved struct { fieldExact bool recordSep string recordEmpty bool - quote byte + quote string + csvString *bool stream StreamFunc } @@ -73,10 +75,10 @@ func resolve(o *CsvOptions) *resolved { strict: strict, noNamePrefix: "field~", emptyField: "", - quote: '"', + quote: `"`, } - // In non-strict mode, trim/comment/number default to true + // In non-strict mode, trim/comment/number/value default to true if strict { r.trim = boolOpt(o.Trim, false) r.comment = boolOpt(o.Comment, false) @@ -107,367 +109,13 @@ func resolve(o *CsvOptions) *resolved { r.recordEmpty = o.Record.Empty } - r.stream = o.Stream - return r -} - -// Parse parses CSV text with the given options. -func Parse(src string, opts ...CsvOptions) ([]any, error) { - var o CsvOptions - if len(opts) > 0 { - o = opts[0] - } - r := resolve(&o) - return parseCSV(src, r) -} - -// parser holds parsing state. -type parser struct { - src string - pos int - opts *resolved -} - -func parseCSV(src string, opts *resolved) ([]any, error) { - if opts.stream != nil { - opts.stream("start", nil) - } - - p := &parser{src: src, pos: 0, opts: opts} - var result []any - - // Parse all raw records - var headers []string - recordIndex := 0 - - for p.pos <= len(p.src) { - fields, isEOF := p.parseRecord() - - // Check if this is an empty record - isEmpty := len(fields) == 0 || (len(fields) == 1 && fields[0] == "") - - if isEmpty && !isEOF { - if recordIndex == 0 { - // Skip leading empty lines (before header) - continue - } - if !opts.recordEmpty { - continue - } - // With empty records enabled, create a record with empty fields - if opts.header && headers != nil { - fields = make([]string, 1) - fields[0] = "" - } - } - - if isEmpty && isEOF { - break - } - - if recordIndex == 0 && opts.header { - // First non-empty record is the header - headers = fields - recordIndex++ - continue - } - - // Build the record - record := buildRecord(fields, headers, opts, recordIndex) - if opts.stream != nil { - opts.stream("record", record) - } else { - result = append(result, record) + if o.String != nil { + if o.String.Quote != "" { + r.quote = o.String.Quote } - recordIndex++ - - if isEOF { - break - } - } - - if result == nil { - result = []any{} + r.csvString = o.String.Csv } - if opts.stream != nil { - opts.stream("end", nil) - } - - return result, nil -} - -// parseRecord parses one record from the current position. -// Returns the fields and whether EOF was reached. -func (p *parser) parseRecord() ([]string, bool) { - if p.pos >= len(p.src) { - return nil, true - } - - var fields []string - isEOF := false - - for { - field, term := p.parseField() - fields = append(fields, field) - - switch term { - case termFieldSep: - // Continue to next field - continue - case termRecordSep: - return fields, false - case termEOF: - isEOF = true - return fields, isEOF - } - } -} - -type terminator int - -const ( - termFieldSep terminator = iota - termRecordSep - termEOF -) - -// parseField parses one field value from the current position. -func (p *parser) parseField() (string, terminator) { - if p.pos >= len(p.src) { - return "", termEOF - } - - // Check if we're at a record separator - if t := p.atRecordSep(); t > 0 { - p.pos += t - return "", termRecordSep - } - - // Check for quoted field - if p.src[p.pos] == p.opts.quote { - return p.parseQuotedField() - } - - return p.parseUnquotedField() -} - -// parseQuotedField parses a quoted field (RFC 4180 style). -func (p *parser) parseQuotedField() (string, terminator) { - quote := p.opts.quote - p.pos++ // skip opening quote - var sb strings.Builder - - for p.pos < len(p.src) { - ch := p.src[p.pos] - if ch == quote { - p.pos++ - // Check for escaped quote (double quote) - if p.pos < len(p.src) && p.src[p.pos] == quote { - sb.WriteByte(quote) - p.pos++ - continue - } - // End of quoted field - skip to next separator - return p.skipToSeparator(sb.String()) - } - sb.WriteByte(ch) - p.pos++ - } - - // Unterminated quote - return what we have - return sb.String(), termEOF -} - -// skipToSeparator skips any content after closing quote until the next separator. -func (p *parser) skipToSeparator(val string) (string, terminator) { - for p.pos < len(p.src) { - // Check for field separator - if strings.HasPrefix(p.src[p.pos:], p.opts.fieldSep) { - p.pos += len(p.opts.fieldSep) - return val, termFieldSep - } - // Check for record separator - if t := p.atRecordSep(); t > 0 { - p.pos += t - return val, termRecordSep - } - // Skip any other character (non-standard content after closing quote) - p.pos++ - } - return val, termEOF -} - -// parseUnquotedField parses an unquoted field value. -func (p *parser) parseUnquotedField() (string, terminator) { - start := p.pos - - for p.pos < len(p.src) { - // Check for comment - if p.opts.comment && p.src[p.pos] == '#' { - val := p.src[start:p.pos] - // Skip to end of line (or record separator) - p.skipToRecordEnd() - return val, termRecordSep - } - - // Check for field separator - if strings.HasPrefix(p.src[p.pos:], p.opts.fieldSep) { - val := p.src[start:p.pos] - p.pos += len(p.opts.fieldSep) - return val, termFieldSep - } - - // Check for record separator - if t := p.atRecordSep(); t > 0 { - val := p.src[start:p.pos] - p.pos += t - return val, termRecordSep - } - - p.pos++ - } - - val := p.src[start:p.pos] - return val, termEOF -} - -// skipToRecordEnd skips to the end of the current record (for comments). -func (p *parser) skipToRecordEnd() { - for p.pos < len(p.src) { - if t := p.atRecordSep(); t > 0 { - p.pos += t - return - } - p.pos++ - } -} - -// atRecordSep checks if the current position is at a record separator. -// Returns the number of bytes to skip, or 0 if not at a separator. -func (p *parser) atRecordSep() int { - if p.pos >= len(p.src) { - return 0 - } - - if p.opts.recordSep != "" { - // Custom record separator - if strings.HasPrefix(p.src[p.pos:], p.opts.recordSep) { - return len(p.opts.recordSep) - } - return 0 - } - - // Default: \r\n or \n or \r - if p.src[p.pos] == '\r' { - if p.pos+1 < len(p.src) && p.src[p.pos+1] == '\n' { - return 2 - } - return 1 - } - if p.src[p.pos] == '\n' { - return 1 - } - return 0 -} - -// buildRecord converts raw field strings into the output format. -func buildRecord(fields []string, headers []string, opts *resolved, recordIndex int) any { - // Apply transformations to field values - processed := make([]any, len(fields)) - for i, f := range fields { - processed[i] = transformValue(f, opts) - } - - if !opts.object { - return processed - } - - // Build object - obj := make(map[string]any) - // Use ordered keys to maintain insertion order - var keys []string - - nameSource := headers - if !opts.header && opts.fieldNames != nil { - nameSource = opts.fieldNames - } - - if nameSource != nil { - for i := 0; i < len(nameSource) && i < len(processed); i++ { - key := nameSource[i] - obj[key] = processed[i] - keys = append(keys, key) - } - // Extra fields beyond named ones - for i := len(nameSource); i < len(processed); i++ { - key := opts.noNamePrefix + strconv.Itoa(i) - obj[key] = processed[i] - keys = append(keys, key) - } - } else { - // No names - use prefix - for i := 0; i < len(processed); i++ { - key := opts.noNamePrefix + strconv.Itoa(i) - obj[key] = processed[i] - keys = append(keys, key) - } - } - - // Fill missing fields with empty value - if nameSource != nil { - for i := len(processed); i < len(nameSource); i++ { - obj[nameSource[i]] = opts.emptyField - } - } - - return orderedMap{keys: keys, m: obj} -} - -// orderedMap maintains insertion order for JSON serialization comparison. -type orderedMap struct { - keys []string - m map[string]any -} - -// transformValue applies trim, number, and value conversions. -func transformValue(s string, opts *resolved) any { - if opts.trim { - s = strings.TrimFunc(s, unicode.IsSpace) - } - - if opts.value { - switch s { - case "true": - return true - case "false": - return false - case "null": - return nil - } - } - - if opts.number { - if n, ok := parseNumber(s); ok { - return n - } - } - - return s -} - -// parseNumber tries to parse a string as a number. -func parseNumber(s string) (float64, bool) { - if s == "" { - return 0, false - } - f, err := strconv.ParseFloat(s, 64) - if err != nil { - return 0, false - } - // Return integer if it's a whole number - if f == float64(int64(f)) && !strings.Contains(s, ".") { - return f, true - } - return f, true + r.stream = o.Stream + return r } diff --git a/go/plugin.go b/go/plugin.go index b78becf..c0de518 100644 --- a/go/plugin.go +++ b/go/plugin.go @@ -1,103 +1,836 @@ package csv import ( + "strconv" + "strings" + jsonic "github.com/jsonicjs/jsonic/go" ) // Csv is a jsonic plugin that adds CSV parsing support. -// It adds a high-priority custom matcher that consumes the entire source -// and produces the CSV-parsed result as a single value token. -// -// Usage: -// -// j := jsonic.Make() -// j.Use(Csv, map[string]any{"header": true}) -// result, err := j.Parse("a,b\n1,2") +// It mirrors the TypeScript Csv plugin, defining grammar rules +// (csv, newline, record, list, elem, val, text) and a custom +// CSV string matcher. func Csv(j *jsonic.Jsonic, pluginOpts map[string]any) { csvOpts := mapToOptions(pluginOpts) + opts := resolve(&csvOpts) + + strict := opts.strict + objres := opts.object + header := opts.header + trim := opts.trim + comment := opts.comment + optNumber := opts.number + optValue := opts.value + recordEmpty := opts.recordEmpty + stream := opts.stream + + // In strict mode, disable JSON structure tokens and Jsonic field content parsing. + if strict { + useCsvString := true + if opts.csvString != nil && !*opts.csvString { + useCsvString = false + } + if useCsvString { + j.AddMatcher("stringcsv", 100000, buildCsvStringMatcher(opts, j)) + } + // Disable JSON structure tokens in strict mode. + cfg := j.Config() + delete(cfg.FixedTokens, "{") + delete(cfg.FixedTokens, "}") + delete(cfg.FixedTokens, "[") + delete(cfg.FixedTokens, "]") + delete(cfg.FixedTokens, ":") + cfg.SortFixedTokens() + + // Exclude jsonic and imp rule groups. + j.Exclude("jsonic", "imp") + } else { + useCsvString := false + if opts.csvString != nil && *opts.csvString { + useCsvString = true + } + if useCsvString { + j.AddMatcher("stringcsv", 100000, buildCsvStringMatcher(opts, j)) + } + if csvOpts.Trim == nil { + trim = true + } + if csvOpts.Comment == nil { + comment = true + } + if csvOpts.Number == nil { + optNumber = true + } + j.Exclude("imp") + } - // Add a high-priority matcher that consumes the entire source - // and produces a single value token containing the parsed CSV result. - j.AddMatcher("csv", 1000, func(lex *jsonic.Lex) *jsonic.Token { + // Custom "comma" (field separator) + if opts.fieldSep != "," { + cfg := j.Config() + // Remove old comma mapping + delete(cfg.FixedTokens, ",") + // Add custom separator + j.Token("#CA", opts.fieldSep) + cfg.SortFixedTokens() + } + + cfg := j.Config() + + // Configure number/value/comment lexing + cfg.NumberLex = optNumber + cfg.ValueLex = optValue + cfg.CommentLex = comment + + // When comments are disabled, clear comment line starters so the text matcher + // doesn't stop at '#' or '//'. Otherwise '#' becomes unmatchable. + if !comment { + cfg.CommentLine = nil + cfg.CommentBlock = nil + } + + // Set start rule + cfg.RuleStart = "csv" + + if opts.recordSep != "" { + cfg.LineChars = make(map[rune]bool) + cfg.RowChars = make(map[rune]bool) + for _, ch := range opts.recordSep { + cfg.LineChars[ch] = true + cfg.RowChars[ch] = true + } + } + + // Register custom token types that are NOT in jsonic's global IGNORE set. + // In the TS version, the IGNORE set is configurable per-instance. + // In Go, TinSetIGNORE is a global map, so we use custom tokens instead. + RL := j.Token("#RL") // Record Line (non-ignored LN equivalent) + RS := j.Token("#RS") // Record Space (non-ignored SP equivalent) + + // Intercept the line matcher: emit #RL instead of #LN so it's not ignored. + // Each line ending (\n or \r\n) is emitted as a separate token so the grammar + // can distinguish multiple newlines (important for empty record handling). + cfg.LineCheck = func(lex *jsonic.Lex) *jsonic.LexCheckResult { pnt := lex.Cursor() - if pnt.SI != 0 { - return nil // Only match at start of source + src := lex.Src + sI := pnt.SI + rI := pnt.RI + if sI >= pnt.Len { + return nil + } + if !cfg.LineChars[rune(src[sI])] { + return nil + } + startI := sI + // Consume one line ending: \r\n or \r or \n + if src[sI] == '\r' { + sI++ + if sI < pnt.Len && src[sI] == '\n' { + sI++ + } + rI++ + } else if cfg.LineChars[rune(src[sI])] { + if cfg.RowChars[rune(src[sI])] { + rI++ + } + sI++ } + tkn := lex.Token("#RL", RL, nil, src[startI:sI]) + pnt.SI = sI + pnt.RI = rI + pnt.CI = 1 + return &jsonic.LexCheckResult{Done: true, Token: tkn} + } + // In strict mode, also intercept space to emit #RS. + // In non-strict mode, spaces are handled by the grammar too. + cfg.SpaceCheck = func(lex *jsonic.Lex) *jsonic.LexCheckResult { + pnt := lex.Cursor() src := lex.Src - result, err := Parse(src, csvOpts) - if err != nil { + sI := pnt.SI + cI := pnt.CI + if sI >= pnt.Len { + return nil + } + if !cfg.SpaceChars[rune(src[sI])] { return nil } + startI := sI + for sI < pnt.Len && cfg.SpaceChars[rune(src[sI])] { + sI++ + cI++ + } + tkn := lex.Token("#RS", RS, nil, src[startI:sI]) + pnt.SI = sI + pnt.CI = cI + return &jsonic.LexCheckResult{Done: true, Token: tkn} + } + + // Get token Tins - use our custom non-ignored tokens + LN := RL // Use RL (non-ignored) instead of LN (ignored) + CA := j.Token("#CA") + SP := RS // Use RS (non-ignored) instead of SP (ignored) + ZZ := j.Token("#ZZ") + VAL := j.TokenSet("VAL") // [TX, NR, ST, VL] + + // ======= csv rule (starting rule) ======= + j.Rule("csv", func(rs *jsonic.RuleSpec) { + rs.Clear() + + rs.AddBO(func(r *jsonic.Rule, ctx *jsonic.Context) { + if ctx.Meta == nil { + ctx.Meta = make(map[string]any) + } + ctx.Meta["recordI"] = 0 + if stream != nil { + stream("start", nil) + } + r.Node = make([]any, 0) + }) + + openAlts := []*jsonic.AltSpec{ + // End immediately if EOF + {S: [][]jsonic.Tin{{ZZ}}}, + } + // Ignore empty lines from the start (if not preserving empty records) + if !recordEmpty { + openAlts = append(openAlts, &jsonic.AltSpec{S: [][]jsonic.Tin{{LN}}, P: "newline"}) + } + // Look for the first record + openAlts = append(openAlts, &jsonic.AltSpec{P: "record"}) + rs.Open = openAlts + + rs.AddAC(func(r *jsonic.Rule, ctx *jsonic.Context) { + if stream != nil { + stream("end", nil) + } + }) + }) + + // ======= newline rule ======= + j.Rule("newline", func(rs *jsonic.RuleSpec) { + rs.Clear() + rs.Open = []*jsonic.AltSpec{ + {S: [][]jsonic.Tin{{LN}, {LN}}, R: "newline"}, + {S: [][]jsonic.Tin{{LN}}, R: "newline"}, + {S: [][]jsonic.Tin{{ZZ}}}, + {R: "record"}, + } + rs.Close = []*jsonic.AltSpec{ + {S: [][]jsonic.Tin{{LN}, {LN}}, R: "newline"}, + {S: [][]jsonic.Tin{{LN}}, R: "newline"}, + {S: [][]jsonic.Tin{{ZZ}}}, + {R: "record"}, + } + }) + + // ======= record rule ======= + j.Rule("record", func(rs *jsonic.RuleSpec) { + rs.Clear() + + rs.Open = []*jsonic.AltSpec{ + {P: "list"}, + } + + closeAlts := []*jsonic.AltSpec{ + {S: [][]jsonic.Tin{{ZZ}}}, + {S: [][]jsonic.Tin{{LN}, {ZZ}}, B: 1}, + } + if recordEmpty { + closeAlts = append(closeAlts, &jsonic.AltSpec{S: [][]jsonic.Tin{{LN}}, R: "record"}) + } else { + closeAlts = append(closeAlts, &jsonic.AltSpec{S: [][]jsonic.Tin{{LN}}, R: "newline"}) + } + rs.Close = closeAlts + + rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { + recordI, _ := ctx.Meta["recordI"].(int) + fields := ctx.Meta["fields"] + fieldNames := opts.fieldNames + + var fieldSlice []string + if fields != nil { + if fs, ok := fields.([]string); ok { + fieldSlice = fs + } + } + if fieldSlice == nil && fieldNames != nil { + fieldSlice = fieldNames + } + + // First line is fields if header=true + if recordI == 0 && header { + // Extract header names from child node + if childArr, ok := r.Child.Node.([]any); ok { + names := make([]string, len(childArr)) + for i, v := range childArr { + if s, ok := v.(string); ok { + names[i] = s + } else { + names[i] = "" + } + } + ctx.Meta["fields"] = names + } else { + ctx.Meta["fields"] = []string{} + } + } else { + // A normal record line + var rawRecord []any + if childArr, ok := r.Child.Node.([]any); ok { + rawRecord = childArr + } else { + rawRecord = []any{} + } + + if objres { + obj := make(map[string]any) + var keys []string + i := 0 + if fieldSlice != nil { + for fI := 0; fI < len(fieldSlice); fI++ { + var val any + if fI < len(rawRecord) { + val = rawRecord[fI] + } else { + val = opts.emptyField + } + obj[fieldSlice[fI]] = val + keys = append(keys, fieldSlice[fI]) + } + i = len(fieldSlice) + } + // Handle extra unnamed fields + for ; i < len(rawRecord); i++ { + fieldName := opts.noNamePrefix + strconv.Itoa(i) + val := rawRecord[i] + obj[fieldName] = val + keys = append(keys, fieldName) + } + + record := orderedMap{keys: keys, m: obj} + if stream != nil { + stream("record", record) + } else { + if arr, ok := r.Node.([]any); ok { + r.Node = append(arr, record) + // Propagate updated slice up through parent chain + // (Go slices may reallocate on append) + if r.Parent != jsonic.NoRule && r.Parent != nil { + r.Parent.Node = r.Node + } + } + } + } else { + // Return records as arrays + for i := 0; i < len(rawRecord); i++ { + if rawRecord[i] == nil { + rawRecord[i] = opts.emptyField + } + } + if stream != nil { + stream("record", rawRecord) + } else { + if arr, ok := r.Node.([]any); ok { + r.Node = append(arr, rawRecord) + if r.Parent != jsonic.NoRule && r.Parent != nil { + r.Parent.Node = r.Node + } + } + } + } + } + ctx.Meta["recordI"] = recordI + 1 + }) + }) + + // ======= list rule ======= + j.Rule("list", func(rs *jsonic.RuleSpec) { + rs.Clear() + rs.AddBO(func(r *jsonic.Rule, ctx *jsonic.Context) { + r.Node = make([]any, 0) + }) + rs.Open = []*jsonic.AltSpec{ + // If at end of line, backtrack (empty record) + {S: [][]jsonic.Tin{{LN}}, B: 1}, + // Otherwise, start parsing elements + {P: "elem"}, + } + rs.Close = []*jsonic.AltSpec{ + // LN ends record + {S: [][]jsonic.Tin{{LN}}, B: 1}, + {S: [][]jsonic.Tin{{ZZ}}}, + } + }) + + // ======= elem rule ======= + j.Rule("elem", func(rs *jsonic.RuleSpec) { + rs.Clear() + + rs.Open = []*jsonic.AltSpec{ + // An empty element (comma without value before it) + {S: [][]jsonic.Tin{{CA}}, B: 1, + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + if arr, ok := r.Node.([]any); ok { + r.Node = append(arr, opts.emptyField) + if r.Parent != jsonic.NoRule && r.Parent != nil { + r.Parent.Node = r.Node + } + } + r.U["done"] = true + }}, + // Normal element - delegate to val + {P: "val"}, + } + + rs.Close = []*jsonic.AltSpec{ + // An empty element at the end of the line: CA followed by LN or ZZ + {S: [][]jsonic.Tin{{CA}, {LN, ZZ}}, B: 1, + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + if arr, ok := r.Node.([]any); ok { + r.Node = append(arr, opts.emptyField) + if r.Parent != jsonic.NoRule && r.Parent != nil { + r.Parent.Node = r.Node + } + } + }}, + // Comma means next element + {S: [][]jsonic.Tin{{CA}}, R: "elem"}, + // LN ends record + {S: [][]jsonic.Tin{{LN}}, B: 1}, + // EOF ends record + {S: [][]jsonic.Tin{{ZZ}}}, + } + + rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { + done, _ := r.U["done"].(bool) + if !done && !jsonic.IsUndefined(r.Child.Node) { + if arr, ok := r.Node.([]any); ok { + r.Node = append(arr, r.Child.Node) + if r.Parent != jsonic.NoRule && r.Parent != nil { + r.Parent.Node = r.Node + } + } + } + }) + }) + + // ======= val rule ======= + j.Rule("val", func(rs *jsonic.RuleSpec) { + rs.Clear() + + rs.AddBO(func(r *jsonic.Rule, ctx *jsonic.Context) { + r.Node = jsonic.Undefined + }) + + rs.Open = []*jsonic.AltSpec{ + // Handle text and space concatenation + {S: [][]jsonic.Tin{VAL, {SP}}, B: 2, P: "text"}, + {S: [][]jsonic.Tin{{SP}}, B: 1, P: "text"}, + // Plain value (no trailing space) + {S: [][]jsonic.Tin{VAL}}, + // LN ends record + {S: [][]jsonic.Tin{{LN}}, B: 1}, + } + + rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { + if jsonic.IsUndefined(r.Node) { + if jsonic.IsUndefined(r.Child.Node) { + if r.OS == 0 { + r.Node = jsonic.Undefined + } else { + r.Node = r.O0.ResolveVal() + } + } else { + r.Node = r.Child.Node + } + } + }) + }) + + // ======= text rule ======= + j.Rule("text", func(rs *jsonic.RuleSpec) { + rs.Clear() + + rs.Open = []*jsonic.AltSpec{ + // Space within non-space is preserved as part of text value + {S: [][]jsonic.Tin{VAL, {SP}}, B: 1, R: "text", + N: map[string]int{"text": 1}, + G: "csv,space,follows", + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + textN := r.N["text"] + var val string + if textN == 1 { + val = "" + } else if r.Prev != nil && r.Prev != jsonic.NoRule { + if s, ok := r.Prev.Node.(string); ok { + val = s + } + } + result := val + tokenStr(r.O0) + r.Node = result + if textN == 1 { + // first text rule + } else if r.Prev != nil && r.Prev != jsonic.NoRule { + r.Prev.Node = result + } + }}, + + // SP VAL + {S: [][]jsonic.Tin{{SP}, VAL}, R: "text", + N: map[string]int{"text": 1}, + G: "csv,space,leads", + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + textN := r.N["text"] + var val string + if textN == 1 { + val = "" + } else if r.Prev != nil && r.Prev != jsonic.NoRule { + if s, ok := r.Prev.Node.(string); ok { + val = s + } + } + spaceStr := "" + if textN >= 2 || !trim { + spaceStr = r.O0.Src + } + result := val + spaceStr + r.O1.Src + r.Node = result + if textN == 1 { + // first + } else if r.Prev != nil && r.Prev != jsonic.NoRule { + r.Prev.Node = result + } + }}, - // Convert result to []any for jsonic - out := make([]any, len(result)) - for i, r := range result { - out[i] = normalizeForJsonic(r) + // SP [CA, LN, ZZ] - trailing space + {S: [][]jsonic.Tin{{SP}, {CA, LN, ZZ}}, B: 1, + N: map[string]int{"text": 1}, + G: "csv,end", + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + textN := r.N["text"] + var val string + if textN == 1 { + val = "" + } else if r.Prev != nil && r.Prev != jsonic.NoRule { + if s, ok := r.Prev.Node.(string); ok { + val = s + } + } + spaceStr := "" + if !trim { + spaceStr = r.O0.Src + } + result := val + spaceStr + r.Node = result + if textN == 1 { + // first + } else if r.Prev != nil && r.Prev != jsonic.NoRule { + r.Prev.Node = result + } + }}, + + // SP only + {S: [][]jsonic.Tin{{SP}}, + N: map[string]int{"text": 1}, + G: "csv,space", + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + if strict { + textN := r.N["text"] + var val string + if textN == 1 { + val = "" + } else if r.Prev != nil && r.Prev != jsonic.NoRule { + if s, ok := r.Prev.Node.(string); ok { + val = s + } + } + spaceStr := "" + if !trim { + spaceStr = r.O0.Src + } + result := val + spaceStr + r.Node = result + if textN == 1 { + // first + } else if r.Prev != nil && r.Prev != jsonic.NoRule { + r.Prev.Node = result + } + } + }, + P: func() string { + if strict { + return "" + } + return "val" + }()}, + + // Accept anything after text + {}, } - tkn := lex.Token("#VL", jsonic.TinVL, any(out), src) - pnt.SI = len(src) // consume entire source - pnt.CI += len(src) - return tkn + rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { + if !jsonic.IsUndefined(r.Child.Node) { + r.Parent.Node = r.Child.Node + } else { + r.Parent.Node = r.Node + } + }) }) } -// normalizeForJsonic converts internal types to standard Go types. -func normalizeForJsonic(v any) any { - switch val := v.(type) { - case orderedMap: - m := make(map[string]any) - for k, v := range val.m { - m[k] = normalizeForJsonic(v) - } - return m - case []any: - out := make([]any, len(val)) - for i, v := range val { - out[i] = normalizeForJsonic(v) - } - return out - default: - return v +// tokenStr gets the string value from a token (Val for ST, Src otherwise). +func tokenStr(t *jsonic.Token) string { + if t == nil || t.IsNoToken() { + return "" + } + if t.Tin == jsonic.TinST { + if s, ok := t.Val.(string); ok { + return s + } + } + return t.Src +} + +// buildCsvStringMatcher creates a custom string matcher for CSV-style +// double-quote escaping: "a""b" → a"b +func buildCsvStringMatcher(opts *resolved, j *jsonic.Jsonic) jsonic.LexMatcher { + quoteChar := opts.quote + return func(lex *jsonic.Lex) *jsonic.Token { + pnt := lex.Cursor() + src := lex.Src + sI := pnt.SI + srclen := len(src) + + if sI >= srclen { + return nil + } + + // Check if we're at a quote character + if !strings.HasPrefix(src[sI:], quoteChar) { + return nil + } + + q := quoteChar + qLen := len(q) + rI := pnt.RI + cI := pnt.CI + + sI += qLen // skip opening quote + cI += qLen + + var s strings.Builder + + for sI < srclen { + cI++ + + // Check for quote character + if strings.HasPrefix(src[sI:], q) { + sI += qLen + cI += qLen - 1 + + // Check for escaped quote (double quote) + if sI < srclen && strings.HasPrefix(src[sI:], q) { + s.WriteString(q) + sI += qLen + cI += qLen + continue + } + + // String finished + val := s.String() + ssrc := src[pnt.SI:sI] + tkn := lex.Token("#ST", jsonic.TinST, val, ssrc) + pnt.SI = sI + pnt.RI = rI + pnt.CI = cI + return tkn + } + + ch := src[sI] + + // Check for line characters (newlines in quoted fields) + cfg := j.Config() + if cfg.LineChars[rune(ch)] { + if cfg.RowChars[rune(ch)] { + rI++ + pnt.RI = rI + } + cI = 1 + s.WriteByte(ch) + sI++ + continue + } + + // Check for unprintable characters + if ch < 32 { + // Bad token + return nil + } + + // Body part of string - fast scan + bI := sI + qFirst := q[0] + for sI < srclen && src[sI] >= 32 && src[sI] != qFirst { + if cfg.LineChars[rune(src[sI])] { + break + } + sI++ + cI++ + } + cI-- + s.WriteString(src[bI:sI]) + } + + // Unterminated string + return nil + } +} + +// orderedMap maintains insertion order for JSON serialization comparison. +type orderedMap struct { + keys []string + m map[string]any +} + +// Parse parses CSV text with the given options, using the jsonic grammar. +func Parse(src string, opts ...CsvOptions) ([]any, error) { + var o CsvOptions + if len(opts) > 0 { + o = opts[0] + } + + j := MakeJsonic(o) + result, err := j.Parse(src) + if err != nil { + return nil, err } + + if result == nil { + return []any{}, nil + } + + if arr, ok := result.([]any); ok { + return arr, nil + } + + return []any{}, nil } // MakeJsonic creates a jsonic instance configured for CSV parsing. -// This is the recommended way to create a CSV-parsing jsonic instance. -// -// Usage: -// -// j := csv.MakeJsonic(csv.CsvOptions{...}) -// result, err := j.Parse("a,b\n1,2") func MakeJsonic(opts ...CsvOptions) *jsonic.Jsonic { var o CsvOptions if len(opts) > 0 { o = opts[0] } - j := jsonic.Make(jsonic.Options{ - Parser: &jsonic.ParserOptions{ - Start: func(src string, j *jsonic.Jsonic, meta map[string]any) (any, error) { - result, err := Parse(src, o) - if err != nil { - return nil, err - } - out := make([]any, len(result)) - for i, r := range result { - out[i] = normalizeForJsonic(r) - } - return out, nil - }, + r := resolve(&o) + + jopts := jsonic.Options{ + Rule: &jsonic.RuleOptions{ + Start: "csv", + }, + Number: &jsonic.NumberOptions{ + Lex: boolPtr(r.number), + }, + Value: &jsonic.ValueOptions{ + Lex: boolPtr(r.value), + }, + Comment: &jsonic.CommentOptions{ + Lex: boolPtr(r.comment), }, Lex: &jsonic.LexOptions{ EmptyResult: []any{}, }, - }) + } + + if r.recordSep != "" { + jopts.Line = &jsonic.LineOptions{ + Chars: r.recordSep, + RowChars: r.recordSep, + } + } + + j := jsonic.Make(jopts) + + // Convert CsvOptions to map for plugin + pluginMap := optionsToMap(&o) + j.Use(Csv, pluginMap) return j } +func boolPtr(b bool) *bool { + return &b +} + +// optionsToMap converts CsvOptions to a map[string]any for the plugin interface. +func optionsToMap(o *CsvOptions) map[string]any { + m := make(map[string]any) + if o.Object != nil { + m["object"] = *o.Object + } + if o.Header != nil { + m["header"] = *o.Header + } + if o.Trim != nil { + m["trim"] = *o.Trim + } + if o.Comment != nil { + m["comment"] = *o.Comment + } + if o.Number != nil { + m["number"] = *o.Number + } + if o.Value != nil { + m["value"] = *o.Value + } + if o.Strict != nil { + m["strict"] = *o.Strict + } + if o.Field != nil { + fm := make(map[string]any) + if o.Field.Separation != "" { + fm["separation"] = o.Field.Separation + } + if o.Field.NonamePrefix != "" { + fm["nonameprefix"] = o.Field.NonamePrefix + } + fm["empty"] = o.Field.Empty + if o.Field.Exact { + fm["exact"] = true + } + if o.Field.Names != nil { + fm["names"] = o.Field.Names + } + m["field"] = fm + } + if o.Record != nil { + rm := make(map[string]any) + if o.Record.Separators != "" { + rm["separators"] = o.Record.Separators + } + if o.Record.Empty { + rm["empty"] = true + } + m["record"] = rm + } + if o.String != nil { + sm := make(map[string]any) + if o.String.Quote != "" { + sm["quote"] = o.String.Quote + } + if o.String.Csv != nil { + sm["csv"] = *o.String.Csv + } + m["string"] = sm + } + if o.Stream != nil { + m["_stream"] = o.Stream + } + return m +} + // mapToOptions converts a map[string]any (plugin options) to CsvOptions. func mapToOptions(m map[string]any) CsvOptions { var o CsvOptions @@ -170,6 +903,20 @@ func mapToOptions(m map[string]any) CsvOptions { } } + if sm, ok := m["string"].(map[string]any); ok { + o.String = &StringOptions{} + if v, ok := sm["quote"].(string); ok { + o.String.Quote = v + } + if v, ok := sm["csv"].(bool); ok { + o.String.Csv = &v + } + } + + if v, ok := m["_stream"].(StreamFunc); ok { + o.Stream = v + } + return o } From 0efe3df54e0bbcb6f1e75d762c2f5e43439f3dbc Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 15:53:38 +0000 Subject: [PATCH 2/4] Update CI to build only for Node 24 and use latest action versions https://claude.ai/code/session_01Ehie1ims4jJtda9A6MmD1x --- .github/workflows/build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b81424e..1f51abe 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -16,14 +16,14 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] - node-version: [18.x, 20.x, 22.x] + node-version: [24.x] runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Use Node.js ${{ matrix.node-version }} - uses: actions/setup-node@v1 + uses: actions/setup-node@v4 with: node-version: ${{ matrix.node-version }} - run: npm i From 715c20dd62b1a495fead07c565746712f1d2cca0 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 16:00:52 +0000 Subject: [PATCH 3/4] Fix merge conflict markers in trim.json fixture file The trim.json fixture had unresolved git merge conflict markers from the merge of main, causing the TS fixtures test to fail with a JSON parse error. https://claude.ai/code/session_01Ehie1ims4jJtda9A6MmD1x --- test/fixtures/trim.json | 4 ---- 1 file changed, 4 deletions(-) diff --git a/test/fixtures/trim.json b/test/fixtures/trim.json index 6f68056..f8f2ee7 100644 --- a/test/fixtures/trim.json +++ b/test/fixtures/trim.json @@ -1,4 +1,3 @@ -<<<<<<< HEAD [ { "a": "1", @@ -21,6 +20,3 @@ "c": "66" } ] -======= -[{"a":"hello","b":"world"},{"a":"foo","b":"bar"}] ->>>>>>> 368b77c70d6f777273b572d66ea6ca2070e462ab From 7361315ed1e56969aad30bbe9e36a11ceab0f4e6 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 16:09:29 +0000 Subject: [PATCH 4/4] Add .gitattributes to preserve line endings in test fixtures On Windows, git's core.autocrlf converts \n to \r\n on checkout, which breaks CSV fixture tests that depend on specific line endings inside quoted fields (e.g. papa-quoted-field-with-line-break). Mark fixture files as binary to prevent line ending conversion. https://claude.ai/code/session_01Ehie1ims4jJtda9A6MmD1x --- .gitattributes | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..403b259 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,3 @@ +# Preserve line endings in test fixtures - some tests depend on specific line endings +test/fixtures/*.csv binary +test/fixtures/*.json binary