diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..403b259 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,3 @@ +# Preserve line endings in test fixtures - some tests depend on specific line endings +test/fixtures/*.csv binary +test/fixtures/*.json binary diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b81424e..1f51abe 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -16,14 +16,14 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] - node-version: [18.x, 20.x, 22.x] + node-version: [24.x] runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Use Node.js ${{ matrix.node-version }} - uses: actions/setup-node@v1 + uses: actions/setup-node@v4 with: node-version: ${{ matrix.node-version }} - run: npm i diff --git a/go/csv.go b/go/csv.go index 12440ea..c142573 100644 --- a/go/csv.go +++ b/go/csv.go @@ -1,23 +1,18 @@ package csv -import ( - "strconv" - "strings" - "unicode" -) - // CsvOptions configures the CSV parser. type CsvOptions struct { - Object *bool // Return objects (default true) or arrays - Header *bool // First row is header (default true) - Trim *bool // Trim whitespace from values - Comment *bool // Enable # comments - Number *bool // Parse numeric values - Value *bool // Parse true/false/null - Strict *bool // Strict CSV mode (default true) - Field *FieldOptions // Field options - Record *RecordOptions // Record options - Stream StreamFunc // Streaming callback + Object *bool // Return objects (default true) or arrays + Header *bool // First row is header (default true) + Trim *bool // Trim whitespace from values + Comment *bool // Enable # comments + Number *bool // Parse numeric values + Value *bool // Parse true/false/null + Strict *bool // Strict CSV mode (default true) + Field *FieldOptions // Field options + Record *RecordOptions // Record options + String *StringOptions // String options + Stream StreamFunc // Streaming callback } // FieldOptions configures field handling. @@ -35,6 +30,12 @@ type RecordOptions struct { Empty bool // Preserve empty records (default false) } +// StringOptions configures string handling. +type StringOptions struct { + Quote string // Quote character (default `"`) + Csv *bool // Force CSV string mode +} + // StreamFunc is a callback for streaming CSV parsing. type StreamFunc func(what string, record any) @@ -54,7 +55,8 @@ type resolved struct { fieldExact bool recordSep string recordEmpty bool - quote byte + quote string + csvString *bool stream StreamFunc } @@ -73,10 +75,10 @@ func resolve(o *CsvOptions) *resolved { strict: strict, noNamePrefix: "field~", emptyField: "", - quote: '"', + quote: `"`, } - // In non-strict mode, trim/comment/number default to true + // In non-strict mode, trim/comment/number/value default to true if strict { r.trim = boolOpt(o.Trim, false) r.comment = boolOpt(o.Comment, false) @@ -107,367 +109,13 @@ func resolve(o *CsvOptions) *resolved { r.recordEmpty = o.Record.Empty } - r.stream = o.Stream - return r -} - -// Parse parses CSV text with the given options. -func Parse(src string, opts ...CsvOptions) ([]any, error) { - var o CsvOptions - if len(opts) > 0 { - o = opts[0] - } - r := resolve(&o) - return parseCSV(src, r) -} - -// parser holds parsing state. -type parser struct { - src string - pos int - opts *resolved -} - -func parseCSV(src string, opts *resolved) ([]any, error) { - if opts.stream != nil { - opts.stream("start", nil) - } - - p := &parser{src: src, pos: 0, opts: opts} - var result []any - - // Parse all raw records - var headers []string - recordIndex := 0 - - for p.pos <= len(p.src) { - fields, isEOF := p.parseRecord() - - // Check if this is an empty record - isEmpty := len(fields) == 0 || (len(fields) == 1 && fields[0] == "") - - if isEmpty && !isEOF { - if recordIndex == 0 { - // Skip leading empty lines (before header) - continue - } - if !opts.recordEmpty { - continue - } - // With empty records enabled, create a record with empty fields - if opts.header && headers != nil { - fields = make([]string, 1) - fields[0] = "" - } - } - - if isEmpty && isEOF { - break - } - - if recordIndex == 0 && opts.header { - // First non-empty record is the header - headers = fields - recordIndex++ - continue - } - - // Build the record - record := buildRecord(fields, headers, opts, recordIndex) - if opts.stream != nil { - opts.stream("record", record) - } else { - result = append(result, record) + if o.String != nil { + if o.String.Quote != "" { + r.quote = o.String.Quote } - recordIndex++ - - if isEOF { - break - } - } - - if result == nil { - result = []any{} + r.csvString = o.String.Csv } - if opts.stream != nil { - opts.stream("end", nil) - } - - return result, nil -} - -// parseRecord parses one record from the current position. -// Returns the fields and whether EOF was reached. -func (p *parser) parseRecord() ([]string, bool) { - if p.pos >= len(p.src) { - return nil, true - } - - var fields []string - isEOF := false - - for { - field, term := p.parseField() - fields = append(fields, field) - - switch term { - case termFieldSep: - // Continue to next field - continue - case termRecordSep: - return fields, false - case termEOF: - isEOF = true - return fields, isEOF - } - } -} - -type terminator int - -const ( - termFieldSep terminator = iota - termRecordSep - termEOF -) - -// parseField parses one field value from the current position. -func (p *parser) parseField() (string, terminator) { - if p.pos >= len(p.src) { - return "", termEOF - } - - // Check if we're at a record separator - if t := p.atRecordSep(); t > 0 { - p.pos += t - return "", termRecordSep - } - - // Check for quoted field - if p.src[p.pos] == p.opts.quote { - return p.parseQuotedField() - } - - return p.parseUnquotedField() -} - -// parseQuotedField parses a quoted field (RFC 4180 style). -func (p *parser) parseQuotedField() (string, terminator) { - quote := p.opts.quote - p.pos++ // skip opening quote - var sb strings.Builder - - for p.pos < len(p.src) { - ch := p.src[p.pos] - if ch == quote { - p.pos++ - // Check for escaped quote (double quote) - if p.pos < len(p.src) && p.src[p.pos] == quote { - sb.WriteByte(quote) - p.pos++ - continue - } - // End of quoted field - skip to next separator - return p.skipToSeparator(sb.String()) - } - sb.WriteByte(ch) - p.pos++ - } - - // Unterminated quote - return what we have - return sb.String(), termEOF -} - -// skipToSeparator skips any content after closing quote until the next separator. -func (p *parser) skipToSeparator(val string) (string, terminator) { - for p.pos < len(p.src) { - // Check for field separator - if strings.HasPrefix(p.src[p.pos:], p.opts.fieldSep) { - p.pos += len(p.opts.fieldSep) - return val, termFieldSep - } - // Check for record separator - if t := p.atRecordSep(); t > 0 { - p.pos += t - return val, termRecordSep - } - // Skip any other character (non-standard content after closing quote) - p.pos++ - } - return val, termEOF -} - -// parseUnquotedField parses an unquoted field value. -func (p *parser) parseUnquotedField() (string, terminator) { - start := p.pos - - for p.pos < len(p.src) { - // Check for comment - if p.opts.comment && p.src[p.pos] == '#' { - val := p.src[start:p.pos] - // Skip to end of line (or record separator) - p.skipToRecordEnd() - return val, termRecordSep - } - - // Check for field separator - if strings.HasPrefix(p.src[p.pos:], p.opts.fieldSep) { - val := p.src[start:p.pos] - p.pos += len(p.opts.fieldSep) - return val, termFieldSep - } - - // Check for record separator - if t := p.atRecordSep(); t > 0 { - val := p.src[start:p.pos] - p.pos += t - return val, termRecordSep - } - - p.pos++ - } - - val := p.src[start:p.pos] - return val, termEOF -} - -// skipToRecordEnd skips to the end of the current record (for comments). -func (p *parser) skipToRecordEnd() { - for p.pos < len(p.src) { - if t := p.atRecordSep(); t > 0 { - p.pos += t - return - } - p.pos++ - } -} - -// atRecordSep checks if the current position is at a record separator. -// Returns the number of bytes to skip, or 0 if not at a separator. -func (p *parser) atRecordSep() int { - if p.pos >= len(p.src) { - return 0 - } - - if p.opts.recordSep != "" { - // Custom record separator - if strings.HasPrefix(p.src[p.pos:], p.opts.recordSep) { - return len(p.opts.recordSep) - } - return 0 - } - - // Default: \r\n or \n or \r - if p.src[p.pos] == '\r' { - if p.pos+1 < len(p.src) && p.src[p.pos+1] == '\n' { - return 2 - } - return 1 - } - if p.src[p.pos] == '\n' { - return 1 - } - return 0 -} - -// buildRecord converts raw field strings into the output format. -func buildRecord(fields []string, headers []string, opts *resolved, recordIndex int) any { - // Apply transformations to field values - processed := make([]any, len(fields)) - for i, f := range fields { - processed[i] = transformValue(f, opts) - } - - if !opts.object { - return processed - } - - // Build object - obj := make(map[string]any) - // Use ordered keys to maintain insertion order - var keys []string - - nameSource := headers - if !opts.header && opts.fieldNames != nil { - nameSource = opts.fieldNames - } - - if nameSource != nil { - for i := 0; i < len(nameSource) && i < len(processed); i++ { - key := nameSource[i] - obj[key] = processed[i] - keys = append(keys, key) - } - // Extra fields beyond named ones - for i := len(nameSource); i < len(processed); i++ { - key := opts.noNamePrefix + strconv.Itoa(i) - obj[key] = processed[i] - keys = append(keys, key) - } - } else { - // No names - use prefix - for i := 0; i < len(processed); i++ { - key := opts.noNamePrefix + strconv.Itoa(i) - obj[key] = processed[i] - keys = append(keys, key) - } - } - - // Fill missing fields with empty value - if nameSource != nil { - for i := len(processed); i < len(nameSource); i++ { - obj[nameSource[i]] = opts.emptyField - } - } - - return orderedMap{keys: keys, m: obj} -} - -// orderedMap maintains insertion order for JSON serialization comparison. -type orderedMap struct { - keys []string - m map[string]any -} - -// transformValue applies trim, number, and value conversions. -func transformValue(s string, opts *resolved) any { - if opts.trim { - s = strings.TrimFunc(s, unicode.IsSpace) - } - - if opts.value { - switch s { - case "true": - return true - case "false": - return false - case "null": - return nil - } - } - - if opts.number { - if n, ok := parseNumber(s); ok { - return n - } - } - - return s -} - -// parseNumber tries to parse a string as a number. -func parseNumber(s string) (float64, bool) { - if s == "" { - return 0, false - } - f, err := strconv.ParseFloat(s, 64) - if err != nil { - return 0, false - } - // Return integer if it's a whole number - if f == float64(int64(f)) && !strings.Contains(s, ".") { - return f, true - } - return f, true + r.stream = o.Stream + return r } diff --git a/go/plugin.go b/go/plugin.go index b78becf..c0de518 100644 --- a/go/plugin.go +++ b/go/plugin.go @@ -1,103 +1,836 @@ package csv import ( + "strconv" + "strings" + jsonic "github.com/jsonicjs/jsonic/go" ) // Csv is a jsonic plugin that adds CSV parsing support. -// It adds a high-priority custom matcher that consumes the entire source -// and produces the CSV-parsed result as a single value token. -// -// Usage: -// -// j := jsonic.Make() -// j.Use(Csv, map[string]any{"header": true}) -// result, err := j.Parse("a,b\n1,2") +// It mirrors the TypeScript Csv plugin, defining grammar rules +// (csv, newline, record, list, elem, val, text) and a custom +// CSV string matcher. func Csv(j *jsonic.Jsonic, pluginOpts map[string]any) { csvOpts := mapToOptions(pluginOpts) + opts := resolve(&csvOpts) + + strict := opts.strict + objres := opts.object + header := opts.header + trim := opts.trim + comment := opts.comment + optNumber := opts.number + optValue := opts.value + recordEmpty := opts.recordEmpty + stream := opts.stream + + // In strict mode, disable JSON structure tokens and Jsonic field content parsing. + if strict { + useCsvString := true + if opts.csvString != nil && !*opts.csvString { + useCsvString = false + } + if useCsvString { + j.AddMatcher("stringcsv", 100000, buildCsvStringMatcher(opts, j)) + } + // Disable JSON structure tokens in strict mode. + cfg := j.Config() + delete(cfg.FixedTokens, "{") + delete(cfg.FixedTokens, "}") + delete(cfg.FixedTokens, "[") + delete(cfg.FixedTokens, "]") + delete(cfg.FixedTokens, ":") + cfg.SortFixedTokens() + + // Exclude jsonic and imp rule groups. + j.Exclude("jsonic", "imp") + } else { + useCsvString := false + if opts.csvString != nil && *opts.csvString { + useCsvString = true + } + if useCsvString { + j.AddMatcher("stringcsv", 100000, buildCsvStringMatcher(opts, j)) + } + if csvOpts.Trim == nil { + trim = true + } + if csvOpts.Comment == nil { + comment = true + } + if csvOpts.Number == nil { + optNumber = true + } + j.Exclude("imp") + } - // Add a high-priority matcher that consumes the entire source - // and produces a single value token containing the parsed CSV result. - j.AddMatcher("csv", 1000, func(lex *jsonic.Lex) *jsonic.Token { + // Custom "comma" (field separator) + if opts.fieldSep != "," { + cfg := j.Config() + // Remove old comma mapping + delete(cfg.FixedTokens, ",") + // Add custom separator + j.Token("#CA", opts.fieldSep) + cfg.SortFixedTokens() + } + + cfg := j.Config() + + // Configure number/value/comment lexing + cfg.NumberLex = optNumber + cfg.ValueLex = optValue + cfg.CommentLex = comment + + // When comments are disabled, clear comment line starters so the text matcher + // doesn't stop at '#' or '//'. Otherwise '#' becomes unmatchable. + if !comment { + cfg.CommentLine = nil + cfg.CommentBlock = nil + } + + // Set start rule + cfg.RuleStart = "csv" + + if opts.recordSep != "" { + cfg.LineChars = make(map[rune]bool) + cfg.RowChars = make(map[rune]bool) + for _, ch := range opts.recordSep { + cfg.LineChars[ch] = true + cfg.RowChars[ch] = true + } + } + + // Register custom token types that are NOT in jsonic's global IGNORE set. + // In the TS version, the IGNORE set is configurable per-instance. + // In Go, TinSetIGNORE is a global map, so we use custom tokens instead. + RL := j.Token("#RL") // Record Line (non-ignored LN equivalent) + RS := j.Token("#RS") // Record Space (non-ignored SP equivalent) + + // Intercept the line matcher: emit #RL instead of #LN so it's not ignored. + // Each line ending (\n or \r\n) is emitted as a separate token so the grammar + // can distinguish multiple newlines (important for empty record handling). + cfg.LineCheck = func(lex *jsonic.Lex) *jsonic.LexCheckResult { pnt := lex.Cursor() - if pnt.SI != 0 { - return nil // Only match at start of source + src := lex.Src + sI := pnt.SI + rI := pnt.RI + if sI >= pnt.Len { + return nil + } + if !cfg.LineChars[rune(src[sI])] { + return nil + } + startI := sI + // Consume one line ending: \r\n or \r or \n + if src[sI] == '\r' { + sI++ + if sI < pnt.Len && src[sI] == '\n' { + sI++ + } + rI++ + } else if cfg.LineChars[rune(src[sI])] { + if cfg.RowChars[rune(src[sI])] { + rI++ + } + sI++ } + tkn := lex.Token("#RL", RL, nil, src[startI:sI]) + pnt.SI = sI + pnt.RI = rI + pnt.CI = 1 + return &jsonic.LexCheckResult{Done: true, Token: tkn} + } + // In strict mode, also intercept space to emit #RS. + // In non-strict mode, spaces are handled by the grammar too. + cfg.SpaceCheck = func(lex *jsonic.Lex) *jsonic.LexCheckResult { + pnt := lex.Cursor() src := lex.Src - result, err := Parse(src, csvOpts) - if err != nil { + sI := pnt.SI + cI := pnt.CI + if sI >= pnt.Len { + return nil + } + if !cfg.SpaceChars[rune(src[sI])] { return nil } + startI := sI + for sI < pnt.Len && cfg.SpaceChars[rune(src[sI])] { + sI++ + cI++ + } + tkn := lex.Token("#RS", RS, nil, src[startI:sI]) + pnt.SI = sI + pnt.CI = cI + return &jsonic.LexCheckResult{Done: true, Token: tkn} + } + + // Get token Tins - use our custom non-ignored tokens + LN := RL // Use RL (non-ignored) instead of LN (ignored) + CA := j.Token("#CA") + SP := RS // Use RS (non-ignored) instead of SP (ignored) + ZZ := j.Token("#ZZ") + VAL := j.TokenSet("VAL") // [TX, NR, ST, VL] + + // ======= csv rule (starting rule) ======= + j.Rule("csv", func(rs *jsonic.RuleSpec) { + rs.Clear() + + rs.AddBO(func(r *jsonic.Rule, ctx *jsonic.Context) { + if ctx.Meta == nil { + ctx.Meta = make(map[string]any) + } + ctx.Meta["recordI"] = 0 + if stream != nil { + stream("start", nil) + } + r.Node = make([]any, 0) + }) + + openAlts := []*jsonic.AltSpec{ + // End immediately if EOF + {S: [][]jsonic.Tin{{ZZ}}}, + } + // Ignore empty lines from the start (if not preserving empty records) + if !recordEmpty { + openAlts = append(openAlts, &jsonic.AltSpec{S: [][]jsonic.Tin{{LN}}, P: "newline"}) + } + // Look for the first record + openAlts = append(openAlts, &jsonic.AltSpec{P: "record"}) + rs.Open = openAlts + + rs.AddAC(func(r *jsonic.Rule, ctx *jsonic.Context) { + if stream != nil { + stream("end", nil) + } + }) + }) + + // ======= newline rule ======= + j.Rule("newline", func(rs *jsonic.RuleSpec) { + rs.Clear() + rs.Open = []*jsonic.AltSpec{ + {S: [][]jsonic.Tin{{LN}, {LN}}, R: "newline"}, + {S: [][]jsonic.Tin{{LN}}, R: "newline"}, + {S: [][]jsonic.Tin{{ZZ}}}, + {R: "record"}, + } + rs.Close = []*jsonic.AltSpec{ + {S: [][]jsonic.Tin{{LN}, {LN}}, R: "newline"}, + {S: [][]jsonic.Tin{{LN}}, R: "newline"}, + {S: [][]jsonic.Tin{{ZZ}}}, + {R: "record"}, + } + }) + + // ======= record rule ======= + j.Rule("record", func(rs *jsonic.RuleSpec) { + rs.Clear() + + rs.Open = []*jsonic.AltSpec{ + {P: "list"}, + } + + closeAlts := []*jsonic.AltSpec{ + {S: [][]jsonic.Tin{{ZZ}}}, + {S: [][]jsonic.Tin{{LN}, {ZZ}}, B: 1}, + } + if recordEmpty { + closeAlts = append(closeAlts, &jsonic.AltSpec{S: [][]jsonic.Tin{{LN}}, R: "record"}) + } else { + closeAlts = append(closeAlts, &jsonic.AltSpec{S: [][]jsonic.Tin{{LN}}, R: "newline"}) + } + rs.Close = closeAlts + + rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { + recordI, _ := ctx.Meta["recordI"].(int) + fields := ctx.Meta["fields"] + fieldNames := opts.fieldNames + + var fieldSlice []string + if fields != nil { + if fs, ok := fields.([]string); ok { + fieldSlice = fs + } + } + if fieldSlice == nil && fieldNames != nil { + fieldSlice = fieldNames + } + + // First line is fields if header=true + if recordI == 0 && header { + // Extract header names from child node + if childArr, ok := r.Child.Node.([]any); ok { + names := make([]string, len(childArr)) + for i, v := range childArr { + if s, ok := v.(string); ok { + names[i] = s + } else { + names[i] = "" + } + } + ctx.Meta["fields"] = names + } else { + ctx.Meta["fields"] = []string{} + } + } else { + // A normal record line + var rawRecord []any + if childArr, ok := r.Child.Node.([]any); ok { + rawRecord = childArr + } else { + rawRecord = []any{} + } + + if objres { + obj := make(map[string]any) + var keys []string + i := 0 + if fieldSlice != nil { + for fI := 0; fI < len(fieldSlice); fI++ { + var val any + if fI < len(rawRecord) { + val = rawRecord[fI] + } else { + val = opts.emptyField + } + obj[fieldSlice[fI]] = val + keys = append(keys, fieldSlice[fI]) + } + i = len(fieldSlice) + } + // Handle extra unnamed fields + for ; i < len(rawRecord); i++ { + fieldName := opts.noNamePrefix + strconv.Itoa(i) + val := rawRecord[i] + obj[fieldName] = val + keys = append(keys, fieldName) + } + + record := orderedMap{keys: keys, m: obj} + if stream != nil { + stream("record", record) + } else { + if arr, ok := r.Node.([]any); ok { + r.Node = append(arr, record) + // Propagate updated slice up through parent chain + // (Go slices may reallocate on append) + if r.Parent != jsonic.NoRule && r.Parent != nil { + r.Parent.Node = r.Node + } + } + } + } else { + // Return records as arrays + for i := 0; i < len(rawRecord); i++ { + if rawRecord[i] == nil { + rawRecord[i] = opts.emptyField + } + } + if stream != nil { + stream("record", rawRecord) + } else { + if arr, ok := r.Node.([]any); ok { + r.Node = append(arr, rawRecord) + if r.Parent != jsonic.NoRule && r.Parent != nil { + r.Parent.Node = r.Node + } + } + } + } + } + ctx.Meta["recordI"] = recordI + 1 + }) + }) + + // ======= list rule ======= + j.Rule("list", func(rs *jsonic.RuleSpec) { + rs.Clear() + rs.AddBO(func(r *jsonic.Rule, ctx *jsonic.Context) { + r.Node = make([]any, 0) + }) + rs.Open = []*jsonic.AltSpec{ + // If at end of line, backtrack (empty record) + {S: [][]jsonic.Tin{{LN}}, B: 1}, + // Otherwise, start parsing elements + {P: "elem"}, + } + rs.Close = []*jsonic.AltSpec{ + // LN ends record + {S: [][]jsonic.Tin{{LN}}, B: 1}, + {S: [][]jsonic.Tin{{ZZ}}}, + } + }) + + // ======= elem rule ======= + j.Rule("elem", func(rs *jsonic.RuleSpec) { + rs.Clear() + + rs.Open = []*jsonic.AltSpec{ + // An empty element (comma without value before it) + {S: [][]jsonic.Tin{{CA}}, B: 1, + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + if arr, ok := r.Node.([]any); ok { + r.Node = append(arr, opts.emptyField) + if r.Parent != jsonic.NoRule && r.Parent != nil { + r.Parent.Node = r.Node + } + } + r.U["done"] = true + }}, + // Normal element - delegate to val + {P: "val"}, + } + + rs.Close = []*jsonic.AltSpec{ + // An empty element at the end of the line: CA followed by LN or ZZ + {S: [][]jsonic.Tin{{CA}, {LN, ZZ}}, B: 1, + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + if arr, ok := r.Node.([]any); ok { + r.Node = append(arr, opts.emptyField) + if r.Parent != jsonic.NoRule && r.Parent != nil { + r.Parent.Node = r.Node + } + } + }}, + // Comma means next element + {S: [][]jsonic.Tin{{CA}}, R: "elem"}, + // LN ends record + {S: [][]jsonic.Tin{{LN}}, B: 1}, + // EOF ends record + {S: [][]jsonic.Tin{{ZZ}}}, + } + + rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { + done, _ := r.U["done"].(bool) + if !done && !jsonic.IsUndefined(r.Child.Node) { + if arr, ok := r.Node.([]any); ok { + r.Node = append(arr, r.Child.Node) + if r.Parent != jsonic.NoRule && r.Parent != nil { + r.Parent.Node = r.Node + } + } + } + }) + }) + + // ======= val rule ======= + j.Rule("val", func(rs *jsonic.RuleSpec) { + rs.Clear() + + rs.AddBO(func(r *jsonic.Rule, ctx *jsonic.Context) { + r.Node = jsonic.Undefined + }) + + rs.Open = []*jsonic.AltSpec{ + // Handle text and space concatenation + {S: [][]jsonic.Tin{VAL, {SP}}, B: 2, P: "text"}, + {S: [][]jsonic.Tin{{SP}}, B: 1, P: "text"}, + // Plain value (no trailing space) + {S: [][]jsonic.Tin{VAL}}, + // LN ends record + {S: [][]jsonic.Tin{{LN}}, B: 1}, + } + + rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { + if jsonic.IsUndefined(r.Node) { + if jsonic.IsUndefined(r.Child.Node) { + if r.OS == 0 { + r.Node = jsonic.Undefined + } else { + r.Node = r.O0.ResolveVal() + } + } else { + r.Node = r.Child.Node + } + } + }) + }) + + // ======= text rule ======= + j.Rule("text", func(rs *jsonic.RuleSpec) { + rs.Clear() + + rs.Open = []*jsonic.AltSpec{ + // Space within non-space is preserved as part of text value + {S: [][]jsonic.Tin{VAL, {SP}}, B: 1, R: "text", + N: map[string]int{"text": 1}, + G: "csv,space,follows", + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + textN := r.N["text"] + var val string + if textN == 1 { + val = "" + } else if r.Prev != nil && r.Prev != jsonic.NoRule { + if s, ok := r.Prev.Node.(string); ok { + val = s + } + } + result := val + tokenStr(r.O0) + r.Node = result + if textN == 1 { + // first text rule + } else if r.Prev != nil && r.Prev != jsonic.NoRule { + r.Prev.Node = result + } + }}, + + // SP VAL + {S: [][]jsonic.Tin{{SP}, VAL}, R: "text", + N: map[string]int{"text": 1}, + G: "csv,space,leads", + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + textN := r.N["text"] + var val string + if textN == 1 { + val = "" + } else if r.Prev != nil && r.Prev != jsonic.NoRule { + if s, ok := r.Prev.Node.(string); ok { + val = s + } + } + spaceStr := "" + if textN >= 2 || !trim { + spaceStr = r.O0.Src + } + result := val + spaceStr + r.O1.Src + r.Node = result + if textN == 1 { + // first + } else if r.Prev != nil && r.Prev != jsonic.NoRule { + r.Prev.Node = result + } + }}, - // Convert result to []any for jsonic - out := make([]any, len(result)) - for i, r := range result { - out[i] = normalizeForJsonic(r) + // SP [CA, LN, ZZ] - trailing space + {S: [][]jsonic.Tin{{SP}, {CA, LN, ZZ}}, B: 1, + N: map[string]int{"text": 1}, + G: "csv,end", + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + textN := r.N["text"] + var val string + if textN == 1 { + val = "" + } else if r.Prev != nil && r.Prev != jsonic.NoRule { + if s, ok := r.Prev.Node.(string); ok { + val = s + } + } + spaceStr := "" + if !trim { + spaceStr = r.O0.Src + } + result := val + spaceStr + r.Node = result + if textN == 1 { + // first + } else if r.Prev != nil && r.Prev != jsonic.NoRule { + r.Prev.Node = result + } + }}, + + // SP only + {S: [][]jsonic.Tin{{SP}}, + N: map[string]int{"text": 1}, + G: "csv,space", + A: func(r *jsonic.Rule, ctx *jsonic.Context) { + if strict { + textN := r.N["text"] + var val string + if textN == 1 { + val = "" + } else if r.Prev != nil && r.Prev != jsonic.NoRule { + if s, ok := r.Prev.Node.(string); ok { + val = s + } + } + spaceStr := "" + if !trim { + spaceStr = r.O0.Src + } + result := val + spaceStr + r.Node = result + if textN == 1 { + // first + } else if r.Prev != nil && r.Prev != jsonic.NoRule { + r.Prev.Node = result + } + } + }, + P: func() string { + if strict { + return "" + } + return "val" + }()}, + + // Accept anything after text + {}, } - tkn := lex.Token("#VL", jsonic.TinVL, any(out), src) - pnt.SI = len(src) // consume entire source - pnt.CI += len(src) - return tkn + rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { + if !jsonic.IsUndefined(r.Child.Node) { + r.Parent.Node = r.Child.Node + } else { + r.Parent.Node = r.Node + } + }) }) } -// normalizeForJsonic converts internal types to standard Go types. -func normalizeForJsonic(v any) any { - switch val := v.(type) { - case orderedMap: - m := make(map[string]any) - for k, v := range val.m { - m[k] = normalizeForJsonic(v) - } - return m - case []any: - out := make([]any, len(val)) - for i, v := range val { - out[i] = normalizeForJsonic(v) - } - return out - default: - return v +// tokenStr gets the string value from a token (Val for ST, Src otherwise). +func tokenStr(t *jsonic.Token) string { + if t == nil || t.IsNoToken() { + return "" + } + if t.Tin == jsonic.TinST { + if s, ok := t.Val.(string); ok { + return s + } + } + return t.Src +} + +// buildCsvStringMatcher creates a custom string matcher for CSV-style +// double-quote escaping: "a""b" → a"b +func buildCsvStringMatcher(opts *resolved, j *jsonic.Jsonic) jsonic.LexMatcher { + quoteChar := opts.quote + return func(lex *jsonic.Lex) *jsonic.Token { + pnt := lex.Cursor() + src := lex.Src + sI := pnt.SI + srclen := len(src) + + if sI >= srclen { + return nil + } + + // Check if we're at a quote character + if !strings.HasPrefix(src[sI:], quoteChar) { + return nil + } + + q := quoteChar + qLen := len(q) + rI := pnt.RI + cI := pnt.CI + + sI += qLen // skip opening quote + cI += qLen + + var s strings.Builder + + for sI < srclen { + cI++ + + // Check for quote character + if strings.HasPrefix(src[sI:], q) { + sI += qLen + cI += qLen - 1 + + // Check for escaped quote (double quote) + if sI < srclen && strings.HasPrefix(src[sI:], q) { + s.WriteString(q) + sI += qLen + cI += qLen + continue + } + + // String finished + val := s.String() + ssrc := src[pnt.SI:sI] + tkn := lex.Token("#ST", jsonic.TinST, val, ssrc) + pnt.SI = sI + pnt.RI = rI + pnt.CI = cI + return tkn + } + + ch := src[sI] + + // Check for line characters (newlines in quoted fields) + cfg := j.Config() + if cfg.LineChars[rune(ch)] { + if cfg.RowChars[rune(ch)] { + rI++ + pnt.RI = rI + } + cI = 1 + s.WriteByte(ch) + sI++ + continue + } + + // Check for unprintable characters + if ch < 32 { + // Bad token + return nil + } + + // Body part of string - fast scan + bI := sI + qFirst := q[0] + for sI < srclen && src[sI] >= 32 && src[sI] != qFirst { + if cfg.LineChars[rune(src[sI])] { + break + } + sI++ + cI++ + } + cI-- + s.WriteString(src[bI:sI]) + } + + // Unterminated string + return nil + } +} + +// orderedMap maintains insertion order for JSON serialization comparison. +type orderedMap struct { + keys []string + m map[string]any +} + +// Parse parses CSV text with the given options, using the jsonic grammar. +func Parse(src string, opts ...CsvOptions) ([]any, error) { + var o CsvOptions + if len(opts) > 0 { + o = opts[0] + } + + j := MakeJsonic(o) + result, err := j.Parse(src) + if err != nil { + return nil, err } + + if result == nil { + return []any{}, nil + } + + if arr, ok := result.([]any); ok { + return arr, nil + } + + return []any{}, nil } // MakeJsonic creates a jsonic instance configured for CSV parsing. -// This is the recommended way to create a CSV-parsing jsonic instance. -// -// Usage: -// -// j := csv.MakeJsonic(csv.CsvOptions{...}) -// result, err := j.Parse("a,b\n1,2") func MakeJsonic(opts ...CsvOptions) *jsonic.Jsonic { var o CsvOptions if len(opts) > 0 { o = opts[0] } - j := jsonic.Make(jsonic.Options{ - Parser: &jsonic.ParserOptions{ - Start: func(src string, j *jsonic.Jsonic, meta map[string]any) (any, error) { - result, err := Parse(src, o) - if err != nil { - return nil, err - } - out := make([]any, len(result)) - for i, r := range result { - out[i] = normalizeForJsonic(r) - } - return out, nil - }, + r := resolve(&o) + + jopts := jsonic.Options{ + Rule: &jsonic.RuleOptions{ + Start: "csv", + }, + Number: &jsonic.NumberOptions{ + Lex: boolPtr(r.number), + }, + Value: &jsonic.ValueOptions{ + Lex: boolPtr(r.value), + }, + Comment: &jsonic.CommentOptions{ + Lex: boolPtr(r.comment), }, Lex: &jsonic.LexOptions{ EmptyResult: []any{}, }, - }) + } + + if r.recordSep != "" { + jopts.Line = &jsonic.LineOptions{ + Chars: r.recordSep, + RowChars: r.recordSep, + } + } + + j := jsonic.Make(jopts) + + // Convert CsvOptions to map for plugin + pluginMap := optionsToMap(&o) + j.Use(Csv, pluginMap) return j } +func boolPtr(b bool) *bool { + return &b +} + +// optionsToMap converts CsvOptions to a map[string]any for the plugin interface. +func optionsToMap(o *CsvOptions) map[string]any { + m := make(map[string]any) + if o.Object != nil { + m["object"] = *o.Object + } + if o.Header != nil { + m["header"] = *o.Header + } + if o.Trim != nil { + m["trim"] = *o.Trim + } + if o.Comment != nil { + m["comment"] = *o.Comment + } + if o.Number != nil { + m["number"] = *o.Number + } + if o.Value != nil { + m["value"] = *o.Value + } + if o.Strict != nil { + m["strict"] = *o.Strict + } + if o.Field != nil { + fm := make(map[string]any) + if o.Field.Separation != "" { + fm["separation"] = o.Field.Separation + } + if o.Field.NonamePrefix != "" { + fm["nonameprefix"] = o.Field.NonamePrefix + } + fm["empty"] = o.Field.Empty + if o.Field.Exact { + fm["exact"] = true + } + if o.Field.Names != nil { + fm["names"] = o.Field.Names + } + m["field"] = fm + } + if o.Record != nil { + rm := make(map[string]any) + if o.Record.Separators != "" { + rm["separators"] = o.Record.Separators + } + if o.Record.Empty { + rm["empty"] = true + } + m["record"] = rm + } + if o.String != nil { + sm := make(map[string]any) + if o.String.Quote != "" { + sm["quote"] = o.String.Quote + } + if o.String.Csv != nil { + sm["csv"] = *o.String.Csv + } + m["string"] = sm + } + if o.Stream != nil { + m["_stream"] = o.Stream + } + return m +} + // mapToOptions converts a map[string]any (plugin options) to CsvOptions. func mapToOptions(m map[string]any) CsvOptions { var o CsvOptions @@ -170,6 +903,20 @@ func mapToOptions(m map[string]any) CsvOptions { } } + if sm, ok := m["string"].(map[string]any); ok { + o.String = &StringOptions{} + if v, ok := sm["quote"].(string); ok { + o.String.Quote = v + } + if v, ok := sm["csv"].(bool); ok { + o.String.Csv = &v + } + } + + if v, ok := m["_stream"].(StreamFunc); ok { + o.Stream = v + } + return o } diff --git a/test/fixtures/trim.json b/test/fixtures/trim.json index 6f68056..f8f2ee7 100644 --- a/test/fixtures/trim.json +++ b/test/fixtures/trim.json @@ -1,4 +1,3 @@ -<<<<<<< HEAD [ { "a": "1", @@ -21,6 +20,3 @@ "c": "66" } ] -======= -[{"a":"hello","b":"world"},{"a":"foo","b":"bar"}] ->>>>>>> 368b77c70d6f777273b572d66ea6ca2070e462ab