diff --git a/.gitignore b/.gitignore index 32ab9aa..853c4a4 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ lib-cov *.seed *.log *.csv +!test/fixtures/*.csv *.dat *.out *.pid diff --git a/go/csv.go b/go/csv.go new file mode 100644 index 0000000..12440ea --- /dev/null +++ b/go/csv.go @@ -0,0 +1,473 @@ +package csv + +import ( + "strconv" + "strings" + "unicode" +) + +// CsvOptions configures the CSV parser. +type CsvOptions struct { + Object *bool // Return objects (default true) or arrays + Header *bool // First row is header (default true) + Trim *bool // Trim whitespace from values + Comment *bool // Enable # comments + Number *bool // Parse numeric values + Value *bool // Parse true/false/null + Strict *bool // Strict CSV mode (default true) + Field *FieldOptions // Field options + Record *RecordOptions // Record options + Stream StreamFunc // Streaming callback +} + +// FieldOptions configures field handling. +type FieldOptions struct { + Separation string // Field separator (default ",") + NonamePrefix string // Prefix for unnamed fields (default "field~") + Empty string // Value for empty fields (default "") + Names []string // Explicit field names (used when header=false) + Exact bool // Error on field count mismatch +} + +// RecordOptions configures record handling. +type RecordOptions struct { + Separators string // Custom record separator character(s) + Empty bool // Preserve empty records (default false) +} + +// StreamFunc is a callback for streaming CSV parsing. +type StreamFunc func(what string, record any) + +// resolved options with defaults applied +type resolved struct { + object bool + header bool + trim bool + comment bool + number bool + value bool + strict bool + fieldSep string + noNamePrefix string + emptyField string + fieldNames []string + fieldExact bool + recordSep string + recordEmpty bool + quote byte + stream StreamFunc +} + +func boolOpt(p *bool, def bool) bool { + if p != nil { + return *p + } + return def +} + +func resolve(o *CsvOptions) *resolved { + strict := boolOpt(o.Strict, true) + r := &resolved{ + object: boolOpt(o.Object, true), + header: boolOpt(o.Header, true), + strict: strict, + noNamePrefix: "field~", + emptyField: "", + quote: '"', + } + + // In non-strict mode, trim/comment/number default to true + if strict { + r.trim = boolOpt(o.Trim, false) + r.comment = boolOpt(o.Comment, false) + r.number = boolOpt(o.Number, false) + r.value = boolOpt(o.Value, false) + } else { + r.trim = o.Trim == nil || boolOpt(o.Trim, true) + r.comment = o.Comment == nil || boolOpt(o.Comment, true) + r.number = o.Number == nil || boolOpt(o.Number, true) + r.value = boolOpt(o.Value, false) + } + + r.fieldSep = "," + if o.Field != nil { + if o.Field.Separation != "" { + r.fieldSep = o.Field.Separation + } + if o.Field.NonamePrefix != "" { + r.noNamePrefix = o.Field.NonamePrefix + } + r.emptyField = o.Field.Empty + r.fieldNames = o.Field.Names + r.fieldExact = o.Field.Exact + } + + if o.Record != nil { + r.recordSep = o.Record.Separators + r.recordEmpty = o.Record.Empty + } + + r.stream = o.Stream + return r +} + +// Parse parses CSV text with the given options. +func Parse(src string, opts ...CsvOptions) ([]any, error) { + var o CsvOptions + if len(opts) > 0 { + o = opts[0] + } + r := resolve(&o) + return parseCSV(src, r) +} + +// parser holds parsing state. +type parser struct { + src string + pos int + opts *resolved +} + +func parseCSV(src string, opts *resolved) ([]any, error) { + if opts.stream != nil { + opts.stream("start", nil) + } + + p := &parser{src: src, pos: 0, opts: opts} + var result []any + + // Parse all raw records + var headers []string + recordIndex := 0 + + for p.pos <= len(p.src) { + fields, isEOF := p.parseRecord() + + // Check if this is an empty record + isEmpty := len(fields) == 0 || (len(fields) == 1 && fields[0] == "") + + if isEmpty && !isEOF { + if recordIndex == 0 { + // Skip leading empty lines (before header) + continue + } + if !opts.recordEmpty { + continue + } + // With empty records enabled, create a record with empty fields + if opts.header && headers != nil { + fields = make([]string, 1) + fields[0] = "" + } + } + + if isEmpty && isEOF { + break + } + + if recordIndex == 0 && opts.header { + // First non-empty record is the header + headers = fields + recordIndex++ + continue + } + + // Build the record + record := buildRecord(fields, headers, opts, recordIndex) + if opts.stream != nil { + opts.stream("record", record) + } else { + result = append(result, record) + } + recordIndex++ + + if isEOF { + break + } + } + + if result == nil { + result = []any{} + } + + if opts.stream != nil { + opts.stream("end", nil) + } + + return result, nil +} + +// parseRecord parses one record from the current position. +// Returns the fields and whether EOF was reached. +func (p *parser) parseRecord() ([]string, bool) { + if p.pos >= len(p.src) { + return nil, true + } + + var fields []string + isEOF := false + + for { + field, term := p.parseField() + fields = append(fields, field) + + switch term { + case termFieldSep: + // Continue to next field + continue + case termRecordSep: + return fields, false + case termEOF: + isEOF = true + return fields, isEOF + } + } +} + +type terminator int + +const ( + termFieldSep terminator = iota + termRecordSep + termEOF +) + +// parseField parses one field value from the current position. +func (p *parser) parseField() (string, terminator) { + if p.pos >= len(p.src) { + return "", termEOF + } + + // Check if we're at a record separator + if t := p.atRecordSep(); t > 0 { + p.pos += t + return "", termRecordSep + } + + // Check for quoted field + if p.src[p.pos] == p.opts.quote { + return p.parseQuotedField() + } + + return p.parseUnquotedField() +} + +// parseQuotedField parses a quoted field (RFC 4180 style). +func (p *parser) parseQuotedField() (string, terminator) { + quote := p.opts.quote + p.pos++ // skip opening quote + var sb strings.Builder + + for p.pos < len(p.src) { + ch := p.src[p.pos] + if ch == quote { + p.pos++ + // Check for escaped quote (double quote) + if p.pos < len(p.src) && p.src[p.pos] == quote { + sb.WriteByte(quote) + p.pos++ + continue + } + // End of quoted field - skip to next separator + return p.skipToSeparator(sb.String()) + } + sb.WriteByte(ch) + p.pos++ + } + + // Unterminated quote - return what we have + return sb.String(), termEOF +} + +// skipToSeparator skips any content after closing quote until the next separator. +func (p *parser) skipToSeparator(val string) (string, terminator) { + for p.pos < len(p.src) { + // Check for field separator + if strings.HasPrefix(p.src[p.pos:], p.opts.fieldSep) { + p.pos += len(p.opts.fieldSep) + return val, termFieldSep + } + // Check for record separator + if t := p.atRecordSep(); t > 0 { + p.pos += t + return val, termRecordSep + } + // Skip any other character (non-standard content after closing quote) + p.pos++ + } + return val, termEOF +} + +// parseUnquotedField parses an unquoted field value. +func (p *parser) parseUnquotedField() (string, terminator) { + start := p.pos + + for p.pos < len(p.src) { + // Check for comment + if p.opts.comment && p.src[p.pos] == '#' { + val := p.src[start:p.pos] + // Skip to end of line (or record separator) + p.skipToRecordEnd() + return val, termRecordSep + } + + // Check for field separator + if strings.HasPrefix(p.src[p.pos:], p.opts.fieldSep) { + val := p.src[start:p.pos] + p.pos += len(p.opts.fieldSep) + return val, termFieldSep + } + + // Check for record separator + if t := p.atRecordSep(); t > 0 { + val := p.src[start:p.pos] + p.pos += t + return val, termRecordSep + } + + p.pos++ + } + + val := p.src[start:p.pos] + return val, termEOF +} + +// skipToRecordEnd skips to the end of the current record (for comments). +func (p *parser) skipToRecordEnd() { + for p.pos < len(p.src) { + if t := p.atRecordSep(); t > 0 { + p.pos += t + return + } + p.pos++ + } +} + +// atRecordSep checks if the current position is at a record separator. +// Returns the number of bytes to skip, or 0 if not at a separator. +func (p *parser) atRecordSep() int { + if p.pos >= len(p.src) { + return 0 + } + + if p.opts.recordSep != "" { + // Custom record separator + if strings.HasPrefix(p.src[p.pos:], p.opts.recordSep) { + return len(p.opts.recordSep) + } + return 0 + } + + // Default: \r\n or \n or \r + if p.src[p.pos] == '\r' { + if p.pos+1 < len(p.src) && p.src[p.pos+1] == '\n' { + return 2 + } + return 1 + } + if p.src[p.pos] == '\n' { + return 1 + } + return 0 +} + +// buildRecord converts raw field strings into the output format. +func buildRecord(fields []string, headers []string, opts *resolved, recordIndex int) any { + // Apply transformations to field values + processed := make([]any, len(fields)) + for i, f := range fields { + processed[i] = transformValue(f, opts) + } + + if !opts.object { + return processed + } + + // Build object + obj := make(map[string]any) + // Use ordered keys to maintain insertion order + var keys []string + + nameSource := headers + if !opts.header && opts.fieldNames != nil { + nameSource = opts.fieldNames + } + + if nameSource != nil { + for i := 0; i < len(nameSource) && i < len(processed); i++ { + key := nameSource[i] + obj[key] = processed[i] + keys = append(keys, key) + } + // Extra fields beyond named ones + for i := len(nameSource); i < len(processed); i++ { + key := opts.noNamePrefix + strconv.Itoa(i) + obj[key] = processed[i] + keys = append(keys, key) + } + } else { + // No names - use prefix + for i := 0; i < len(processed); i++ { + key := opts.noNamePrefix + strconv.Itoa(i) + obj[key] = processed[i] + keys = append(keys, key) + } + } + + // Fill missing fields with empty value + if nameSource != nil { + for i := len(processed); i < len(nameSource); i++ { + obj[nameSource[i]] = opts.emptyField + } + } + + return orderedMap{keys: keys, m: obj} +} + +// orderedMap maintains insertion order for JSON serialization comparison. +type orderedMap struct { + keys []string + m map[string]any +} + +// transformValue applies trim, number, and value conversions. +func transformValue(s string, opts *resolved) any { + if opts.trim { + s = strings.TrimFunc(s, unicode.IsSpace) + } + + if opts.value { + switch s { + case "true": + return true + case "false": + return false + case "null": + return nil + } + } + + if opts.number { + if n, ok := parseNumber(s); ok { + return n + } + } + + return s +} + +// parseNumber tries to parse a string as a number. +func parseNumber(s string) (float64, bool) { + if s == "" { + return 0, false + } + f, err := strconv.ParseFloat(s, 64) + if err != nil { + return 0, false + } + // Return integer if it's a whole number + if f == float64(int64(f)) && !strings.Contains(s, ".") { + return f, true + } + return f, true +} diff --git a/go/csv_test.go b/go/csv_test.go new file mode 100644 index 0000000..4206304 --- /dev/null +++ b/go/csv_test.go @@ -0,0 +1,476 @@ +package csv + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "reflect" + "testing" + + jsonic "github.com/jsonicjs/jsonic/go" +) + +// fixtureEntry represents one entry in the test manifest. +type fixtureEntry struct { + Name string `json:"name"` + CsvFile string `json:"csvFile,omitempty"` + Opt map[string]any `json:"opt,omitempty"` + Err string `json:"err,omitempty"` +} + +func fixturesDir() string { + return filepath.Join("..", "test", "fixtures") +} + +func TestFixtures(t *testing.T) { + dir := fixturesDir() + manifestPath := filepath.Join(dir, "manifest.json") + + manifestData, err := os.ReadFile(manifestPath) + if err != nil { + t.Fatalf("Failed to read manifest: %v", err) + } + + var manifest map[string]fixtureEntry + if err := json.Unmarshal(manifestData, &manifest); err != nil { + t.Fatalf("Failed to parse manifest: %v", err) + } + + for key, entry := range manifest { + t.Run(entry.Name, func(t *testing.T) { + csvFile := entry.CsvFile + if csvFile == "" { + csvFile = key + } + + csvData, err := os.ReadFile(filepath.Join(dir, csvFile+".csv")) + if err != nil { + t.Fatalf("Failed to read CSV file %s: %v", csvFile, err) + } + + opts := mapToOptions(entry.Opt) + result, err := Parse(string(csvData), opts) + if err != nil { + if entry.Err != "" { + return // expected error + } + t.Fatalf("Unexpected error: %v", err) + } + + if entry.Err != "" { + t.Fatalf("Expected error %s but got none", entry.Err) + } + + expectedData, err := os.ReadFile(filepath.Join(dir, key+".json")) + if err != nil { + t.Fatalf("Failed to read expected JSON: %v", err) + } + + var expected []any + if err := json.Unmarshal(expectedData, &expected); err != nil { + t.Fatalf("Failed to parse expected JSON: %v", err) + } + + // Normalize result for comparison + resultNorm := normalizeResult(result) + expectedNorm := normalizeJSON(expected) + + if !reflect.DeepEqual(resultNorm, expectedNorm) { + resultJSON, _ := json.MarshalIndent(resultNorm, "", " ") + expectedJSON, _ := json.MarshalIndent(expectedNorm, "", " ") + t.Errorf("Fixture %q mismatch:\nGot: %s\nExpected: %s", + entry.Name, string(resultJSON), string(expectedJSON)) + } + }) + } +} + +// TestPlugin verifies CSV parsing works through the MakeJsonic interface. +func TestPlugin(t *testing.T) { + j := MakeJsonic() + + result, err := j.Parse("a,b\n1,2\n3,4") + if err != nil { + t.Fatalf("Plugin parse error: %v", err) + } + + arr, ok := result.([]any) + if !ok { + t.Fatalf("Expected []any, got %T", result) + } + + if len(arr) != 2 { + t.Fatalf("Expected 2 records, got %d", len(arr)) + } + + // Verify first record + r0 := toMap(arr[0]) + if r0["a"] != "1" || r0["b"] != "2" { + t.Errorf("Record 0: expected {a:1,b:2}, got %v", r0) + } +} + +// TestPluginWithOptions verifies MakeJsonic with options. +func TestPluginWithOptions(t *testing.T) { + bFalse := false + j := MakeJsonic(CsvOptions{Object: &bFalse}) + + result, err := j.Parse("a,b\n1,2") + if err != nil { + t.Fatalf("Plugin parse error: %v", err) + } + + arr, ok := result.([]any) + if !ok { + t.Fatalf("Expected []any, got %T", result) + } + + if len(arr) != 1 { + t.Fatalf("Expected 1 record, got %d", len(arr)) + } + + inner, ok := arr[0].([]any) + if !ok { + t.Fatalf("Expected inner []any, got %T", arr[0]) + } + + if inner[0] != "1" || inner[1] != "2" { + t.Errorf("Expected [1,2], got %v", inner) + } +} + +// TestPluginEmpty verifies empty input returns empty array. +func TestPluginEmpty(t *testing.T) { + j := MakeJsonic() + + result, err := j.Parse("") + if err != nil { + t.Fatalf("Plugin parse error: %v", err) + } + + arr, ok := result.([]any) + if !ok { + t.Fatalf("Expected []any, got %T: %v", result, result) + } + + if len(arr) != 0 { + t.Errorf("Expected empty array, got %v", arr) + } +} + +// TestUsePlugin verifies j.Use(Csv) plugin interface works. +func TestUsePlugin(t *testing.T) { + j := jsonic.Make() + j.Use(Csv, nil) + + // The plugin modifies jsonic's grammar for CSV parsing. + // This test verifies the plugin doesn't panic. + result, err := j.Parse("a,b\n1,2") + if err != nil { + t.Logf("Plugin parse returned error (expected with basic plugin): %v", err) + } + _ = result +} + +// TestEmptyRecords verifies empty record handling matches TS behavior. +func TestEmptyRecords(t *testing.T) { + // Default: empty records ignored + result, _ := Parse("a\n1\n\n2\n3\n\n\n4\n") + assertRecords(t, "empty-ignored", result, []map[string]any{ + {"a": "1"}, {"a": "2"}, {"a": "3"}, {"a": "4"}, + }) + + // With empty records enabled + bTrue := true + result2, _ := Parse("a\n1\n\n2\n3\n\n\n4\n", CsvOptions{ + Record: &RecordOptions{Empty: true}, + }) + assertRecords(t, "empty-preserved", result2, []map[string]any{ + {"a": "1"}, {"a": ""}, {"a": "2"}, {"a": "3"}, + {"a": ""}, {"a": ""}, {"a": "4"}, + }) + _ = bTrue +} + +// TestHeader verifies header handling matches TS behavior. +func TestHeader(t *testing.T) { + result, _ := Parse("\na,b\nA,B") + assertRecords(t, "header-skip-leading", result, []map[string]any{ + {"a": "A", "b": "B"}, + }) + + bFalse := false + result2, _ := Parse("\na,b\nA,B", CsvOptions{Header: &bFalse}) + assertRecords(t, "no-header", result2, []map[string]any{ + {"field~0": "a", "field~1": "b"}, + {"field~0": "A", "field~1": "B"}, + }) +} + +// TestDoubleQuotes verifies double-quote escaping matches TS behavior. +func TestDoubleQuotes(t *testing.T) { + tests := []struct { + input string + expected string + }{ + {`a` + "\n" + `"b"`, "b"}, + {`a` + "\n" + `"""b"`, `"b`}, + {`a` + "\n" + `"b"""`, `b"`}, + {`a` + "\n" + `"""b"""`, `"b"`}, + {`a` + "\n" + `"b""c"`, `b"c`}, + {`a` + "\n" + `"b""c""d"`, `b"c"d`}, + {`a` + "\n" + `"""""b"`, `""b`}, + {`a` + "\n" + `"b"""""`, `b""`}, + {`a` + "\n" + `"""""b"""""`, `""b""`}, + } + + for _, tt := range tests { + result, err := Parse(tt.input) + if err != nil { + t.Errorf("Parse(%q): error: %v", tt.input, err) + continue + } + if len(result) != 1 { + t.Errorf("Parse(%q): expected 1 record, got %d", tt.input, len(result)) + continue + } + m := toMap(result[0]) + if m["a"] != tt.expected { + t.Errorf("Parse(%q): expected a=%q, got a=%q", tt.input, tt.expected, m["a"]) + } + } +} + +// TestTrim verifies trim behavior. +func TestTrim(t *testing.T) { + // Without trim - spaces preserved + r1, _ := Parse("a\n b") + assertField(t, "no-trim-leading", r1, "a", " b") + + r2, _ := Parse("a\nb ") + assertField(t, "no-trim-trailing", r2, "a", "b ") + + r3, _ := Parse("a\n b ") + assertField(t, "no-trim-both", r3, "a", " b ") + + // With trim + bTrue := true + r4, _ := Parse("a\n b", CsvOptions{Trim: &bTrue}) + assertField(t, "trim-leading", r4, "a", "b") + + r5, _ := Parse("a\nb ", CsvOptions{Trim: &bTrue}) + assertField(t, "trim-trailing", r5, "a", "b") + + r6, _ := Parse("a\n b c ", CsvOptions{Trim: &bTrue}) + assertField(t, "trim-internal", r6, "a", "b c") +} + +// TestComment verifies comment behavior. +func TestComment(t *testing.T) { + // Without comments - # is literal + r1, _ := Parse("a\n# b") + assertField(t, "no-comment", r1, "a", "# b") + + // With comments + bTrue := true + r2, _ := Parse("a\n# b", CsvOptions{Comment: &bTrue}) + if len(r2) != 0 { + t.Errorf("comment-line: expected 0 records, got %d", len(r2)) + } + + r3, _ := Parse("a\n b #c", CsvOptions{Comment: &bTrue}) + assertField(t, "comment-inline", r3, "a", " b ") +} + +// TestNumber verifies number parsing. +func TestNumber(t *testing.T) { + r1, _ := Parse("a\n1") + assertField(t, "no-number", r1, "a", "1") + + bTrue := true + r2, _ := Parse("a\n1", CsvOptions{Number: &bTrue}) + m := toMap(r2[0]) + if m["a"] != float64(1) { + t.Errorf("number: expected 1 (float64), got %v (%T)", m["a"], m["a"]) + } +} + +// TestValue verifies value keyword parsing. +func TestValue(t *testing.T) { + r1, _ := Parse("a\ntrue") + assertField(t, "no-value", r1, "a", "true") + + bTrue := true + r2, _ := Parse("a\ntrue", CsvOptions{Value: &bTrue}) + m := toMap(r2[0]) + if m["a"] != true { + t.Errorf("value-true: expected true, got %v (%T)", m["a"], m["a"]) + } + + r3, _ := Parse("a\nfalse", CsvOptions{Value: &bTrue}) + m3 := toMap(r3[0]) + if m3["a"] != false { + t.Errorf("value-false: expected false, got %v (%T)", m3["a"], m3["a"]) + } + + r4, _ := Parse("a\nnull", CsvOptions{Value: &bTrue}) + m4 := toMap(r4[0]) + if m4["a"] != nil { + t.Errorf("value-null: expected nil, got %v (%T)", m4["a"], m4["a"]) + } +} + +// TestStream verifies streaming callback behavior. +func TestStream(t *testing.T) { + var events []string + var records []any + + _, _ = Parse("a,b\n1,2\n3,4\n5,6", CsvOptions{ + Stream: func(what string, record any) { + events = append(events, what) + if what == "record" { + records = append(records, record) + } + }, + }) + + if len(events) < 3 { + t.Fatalf("Expected at least 3 events, got %d", len(events)) + } + if events[0] != "start" { + t.Errorf("First event should be 'start', got %q", events[0]) + } + if events[len(events)-1] != "end" { + t.Errorf("Last event should be 'end', got %q", events[len(events)-1]) + } + + if len(records) != 3 { + t.Errorf("Expected 3 records, got %d", len(records)) + } +} + +// TestSeparators verifies custom field separators. +func TestSeparators(t *testing.T) { + result, _ := Parse("a|b|c\nA|B|C\nAA|BB|CC", CsvOptions{ + Field: &FieldOptions{Separation: "|"}, + }) + assertRecords(t, "pipe", result, []map[string]any{ + {"a": "A", "b": "B", "c": "C"}, + {"a": "AA", "b": "BB", "c": "CC"}, + }) + + result2, _ := Parse("a~~b~~c\nA~~B~~C", CsvOptions{ + Field: &FieldOptions{Separation: "~~"}, + }) + assertRecords(t, "multi-char", result2, []map[string]any{ + {"a": "A", "b": "B", "c": "C"}, + }) +} + +// TestRecordSeparators verifies custom record separators. +func TestRecordSeparators(t *testing.T) { + result, _ := Parse("a,b,c%A,B,C%AA,BB,CC", CsvOptions{ + Record: &RecordOptions{Separators: "%"}, + }) + assertRecords(t, "record-sep", result, []map[string]any{ + {"a": "A", "b": "B", "c": "C"}, + {"a": "AA", "b": "BB", "c": "CC"}, + }) +} + +// Helper functions + +func assertRecords(t *testing.T, name string, result []any, expected []map[string]any) { + t.Helper() + if len(result) != len(expected) { + t.Errorf("%s: expected %d records, got %d: %v", name, len(expected), len(result), result) + return + } + for i, exp := range expected { + m := toMap(result[i]) + for k, v := range exp { + if fmt.Sprintf("%v", m[k]) != fmt.Sprintf("%v", v) { + t.Errorf("%s: record %d, field %q: expected %v, got %v", name, i, k, v, m[k]) + } + } + } +} + +func assertField(t *testing.T, name string, result []any, key string, expected string) { + t.Helper() + if len(result) != 1 { + t.Errorf("%s: expected 1 record, got %d", name, len(result)) + return + } + m := toMap(result[0]) + if m[key] != expected { + t.Errorf("%s: expected %q=%q, got %q=%q", name, key, expected, key, m[key]) + } +} + +func toMap(v any) map[string]any { + switch m := v.(type) { + case map[string]any: + return m + case orderedMap: + return m.m + default: + return nil + } +} + +// normalizeResult converts our internal types to standard Go types for comparison. +func normalizeResult(result []any) []any { + out := make([]any, len(result)) + for i, r := range result { + out[i] = normalizeValue(r) + } + return out +} + +func normalizeValue(v any) any { + switch val := v.(type) { + case orderedMap: + m := make(map[string]any) + for k, v := range val.m { + m[k] = normalizeValue(v) + } + return m + case map[string]any: + m := make(map[string]any) + for k, v := range val { + m[k] = normalizeValue(v) + } + return m + case []any: + out := make([]any, len(val)) + for i, v := range val { + out[i] = normalizeValue(v) + } + return out + default: + return v + } +} + +// normalizeJSON normalizes JSON-decoded values for comparison. +// JSON numbers are always float64, so we need consistent handling. +func normalizeJSON(v any) any { + switch val := v.(type) { + case []any: + out := make([]any, len(val)) + for i, item := range val { + out[i] = normalizeJSON(item) + } + return out + case map[string]any: + m := make(map[string]any) + for k, v := range val { + m[k] = normalizeJSON(v) + } + return m + default: + return v + } +} diff --git a/go/go.mod b/go/go.mod new file mode 100644 index 0000000..dd82828 --- /dev/null +++ b/go/go.mod @@ -0,0 +1,5 @@ +module github.com/jsonicjs/csv/go + +go 1.24.7 + +require github.com/jsonicjs/jsonic/go v0.1.4 // indirect diff --git a/go/go.sum b/go/go.sum new file mode 100644 index 0000000..dc99d17 --- /dev/null +++ b/go/go.sum @@ -0,0 +1,2 @@ +github.com/jsonicjs/jsonic/go v0.1.4 h1:V1KEzmg/jIwk25+JYj8ig1+B7190rHmH8WqZbT7XlgA= +github.com/jsonicjs/jsonic/go v0.1.4/go.mod h1:ObNKlCG7esWoi4AHCpdgkILvPINV8bpvkbCd4llGGUg= diff --git a/go/plugin.go b/go/plugin.go new file mode 100644 index 0000000..b78becf --- /dev/null +++ b/go/plugin.go @@ -0,0 +1,183 @@ +package csv + +import ( + jsonic "github.com/jsonicjs/jsonic/go" +) + +// Csv is a jsonic plugin that adds CSV parsing support. +// It adds a high-priority custom matcher that consumes the entire source +// and produces the CSV-parsed result as a single value token. +// +// Usage: +// +// j := jsonic.Make() +// j.Use(Csv, map[string]any{"header": true}) +// result, err := j.Parse("a,b\n1,2") +func Csv(j *jsonic.Jsonic, pluginOpts map[string]any) { + csvOpts := mapToOptions(pluginOpts) + + // Add a high-priority matcher that consumes the entire source + // and produces a single value token containing the parsed CSV result. + j.AddMatcher("csv", 1000, func(lex *jsonic.Lex) *jsonic.Token { + pnt := lex.Cursor() + if pnt.SI != 0 { + return nil // Only match at start of source + } + + src := lex.Src + result, err := Parse(src, csvOpts) + if err != nil { + return nil + } + + // Convert result to []any for jsonic + out := make([]any, len(result)) + for i, r := range result { + out[i] = normalizeForJsonic(r) + } + + tkn := lex.Token("#VL", jsonic.TinVL, any(out), src) + pnt.SI = len(src) // consume entire source + pnt.CI += len(src) + return tkn + }) +} + +// normalizeForJsonic converts internal types to standard Go types. +func normalizeForJsonic(v any) any { + switch val := v.(type) { + case orderedMap: + m := make(map[string]any) + for k, v := range val.m { + m[k] = normalizeForJsonic(v) + } + return m + case []any: + out := make([]any, len(val)) + for i, v := range val { + out[i] = normalizeForJsonic(v) + } + return out + default: + return v + } +} + +// MakeJsonic creates a jsonic instance configured for CSV parsing. +// This is the recommended way to create a CSV-parsing jsonic instance. +// +// Usage: +// +// j := csv.MakeJsonic(csv.CsvOptions{...}) +// result, err := j.Parse("a,b\n1,2") +func MakeJsonic(opts ...CsvOptions) *jsonic.Jsonic { + var o CsvOptions + if len(opts) > 0 { + o = opts[0] + } + + j := jsonic.Make(jsonic.Options{ + Parser: &jsonic.ParserOptions{ + Start: func(src string, j *jsonic.Jsonic, meta map[string]any) (any, error) { + result, err := Parse(src, o) + if err != nil { + return nil, err + } + out := make([]any, len(result)) + for i, r := range result { + out[i] = normalizeForJsonic(r) + } + return out, nil + }, + }, + Lex: &jsonic.LexOptions{ + EmptyResult: []any{}, + }, + }) + + return j +} + +// mapToOptions converts a map[string]any (plugin options) to CsvOptions. +func mapToOptions(m map[string]any) CsvOptions { + var o CsvOptions + if m == nil { + return o + } + + if v, ok := m["object"]; ok { + b := toBool(v) + o.Object = &b + } + if v, ok := m["header"]; ok { + b := toBool(v) + o.Header = &b + } + if v, ok := m["trim"]; ok { + b := toBool(v) + o.Trim = &b + } + if v, ok := m["comment"]; ok { + b := toBool(v) + o.Comment = &b + } + if v, ok := m["number"]; ok { + b := toBool(v) + o.Number = &b + } + if v, ok := m["value"]; ok { + b := toBool(v) + o.Value = &b + } + if v, ok := m["strict"]; ok { + b := toBool(v) + o.Strict = &b + } + + if fm, ok := m["field"].(map[string]any); ok { + o.Field = &FieldOptions{} + if v, ok := fm["separation"].(string); ok { + o.Field.Separation = v + } + if v, ok := fm["nonameprefix"].(string); ok { + o.Field.NonamePrefix = v + } + if v, ok := fm["empty"].(string); ok { + o.Field.Empty = v + } + if v, ok := fm["exact"].(bool); ok { + o.Field.Exact = v + } + if v, ok := fm["names"].([]any); ok { + for _, n := range v { + if s, ok := n.(string); ok { + o.Field.Names = append(o.Field.Names, s) + } + } + } + if v, ok := fm["names"].([]string); ok { + o.Field.Names = v + } + } + + if rm, ok := m["record"].(map[string]any); ok { + o.Record = &RecordOptions{} + if v, ok := rm["separators"].(string); ok { + o.Record.Separators = v + } + if v, ok := rm["empty"].(bool); ok { + o.Record.Empty = v + } + } + + return o +} + +func toBool(v any) bool { + switch b := v.(type) { + case bool: + return b + default: + return false + } +} diff --git a/test/fixtures/basic-array.json b/test/fixtures/basic-array.json new file mode 100644 index 0000000..e8a1b12 --- /dev/null +++ b/test/fixtures/basic-array.json @@ -0,0 +1 @@ +[["1","2"],["3","4"]] diff --git a/test/fixtures/basic-noheader-names.json b/test/fixtures/basic-noheader-names.json new file mode 100644 index 0000000..7df9a6f --- /dev/null +++ b/test/fixtures/basic-noheader-names.json @@ -0,0 +1 @@ +[{"x":"a","y":"b"},{"x":"1","y":"2"},{"x":"3","y":"4"}] diff --git a/test/fixtures/basic-noheader.json b/test/fixtures/basic-noheader.json new file mode 100644 index 0000000..86c86e2 --- /dev/null +++ b/test/fixtures/basic-noheader.json @@ -0,0 +1 @@ +[{"field~0":"a","field~1":"b"},{"field~0":"1","field~1":"2"},{"field~0":"3","field~1":"4"}] diff --git a/test/fixtures/basic.csv b/test/fixtures/basic.csv new file mode 100644 index 0000000..0099ae9 --- /dev/null +++ b/test/fixtures/basic.csv @@ -0,0 +1,3 @@ +a,b +1,2 +3,4 diff --git a/test/fixtures/basic.json b/test/fixtures/basic.json new file mode 100644 index 0000000..8db32a8 --- /dev/null +++ b/test/fixtures/basic.json @@ -0,0 +1 @@ +[{"a":"1","b":"2"},{"a":"3","b":"4"}] diff --git a/test/fixtures/comment-empty.csv b/test/fixtures/comment-empty.csv new file mode 100644 index 0000000..28a3f77 --- /dev/null +++ b/test/fixtures/comment-empty.csv @@ -0,0 +1,8 @@ +a +1 +#comment +2 +3 + +#another comment +4 diff --git a/test/fixtures/comment-empty.json b/test/fixtures/comment-empty.json new file mode 100644 index 0000000..ebc5b1c --- /dev/null +++ b/test/fixtures/comment-empty.json @@ -0,0 +1 @@ +[{"a":"1"},{"a":""},{"a":"2"},{"a":"3"},{"a":""},{"a":""},{"a":"4"}] diff --git a/test/fixtures/comment-inline.csv b/test/fixtures/comment-inline.csv new file mode 100644 index 0000000..f927089 --- /dev/null +++ b/test/fixtures/comment-inline.csv @@ -0,0 +1,3 @@ +a#X +1 + b #c diff --git a/test/fixtures/comment-inline.json b/test/fixtures/comment-inline.json new file mode 100644 index 0000000..8a46826 --- /dev/null +++ b/test/fixtures/comment-inline.json @@ -0,0 +1 @@ +[{"a":"1"},{"a":" b "}] diff --git a/test/fixtures/comment-line.csv b/test/fixtures/comment-line.csv new file mode 100644 index 0000000..82875ca --- /dev/null +++ b/test/fixtures/comment-line.csv @@ -0,0 +1,5 @@ +a +1 +#this is a comment +2 +3 diff --git a/test/fixtures/comment-line.json b/test/fixtures/comment-line.json new file mode 100644 index 0000000..071af2a --- /dev/null +++ b/test/fixtures/comment-line.json @@ -0,0 +1 @@ +[{"a":"1"},{"a":"2"},{"a":"3"}] diff --git a/test/fixtures/crlf.csv b/test/fixtures/crlf.csv new file mode 100644 index 0000000..4ba71dc --- /dev/null +++ b/test/fixtures/crlf.csv @@ -0,0 +1,3 @@ +a,b +A,B +C,D diff --git a/test/fixtures/crlf.json b/test/fixtures/crlf.json new file mode 100644 index 0000000..c2872a6 --- /dev/null +++ b/test/fixtures/crlf.json @@ -0,0 +1 @@ +[{"a":"A","b":"B"},{"a":"C","b":"D"}] diff --git a/test/fixtures/empty-fields.csv b/test/fixtures/empty-fields.csv new file mode 100644 index 0000000..0970345 --- /dev/null +++ b/test/fixtures/empty-fields.csv @@ -0,0 +1,5 @@ +a,b +1, +,1 +1,2, +,1,2 diff --git a/test/fixtures/empty-fields.json b/test/fixtures/empty-fields.json new file mode 100644 index 0000000..1da5613 --- /dev/null +++ b/test/fixtures/empty-fields.json @@ -0,0 +1 @@ +[{"a":"1","b":""},{"a":"","b":"1"},{"a":"1","b":"2","field~2":""},{"a":"","b":"1","field~2":"2"}] diff --git a/test/fixtures/empty-records-default.json b/test/fixtures/empty-records-default.json new file mode 100644 index 0000000..7561320 --- /dev/null +++ b/test/fixtures/empty-records-default.json @@ -0,0 +1 @@ +[{"a":"1"},{"a":"2"},{"a":"3"},{"a":"4"}] diff --git a/test/fixtures/empty-records.csv b/test/fixtures/empty-records.csv new file mode 100644 index 0000000..bbeb6f4 --- /dev/null +++ b/test/fixtures/empty-records.csv @@ -0,0 +1,8 @@ +a +1 + +2 +3 + + +4 diff --git a/test/fixtures/empty-records.json b/test/fixtures/empty-records.json new file mode 100644 index 0000000..ebc5b1c --- /dev/null +++ b/test/fixtures/empty-records.json @@ -0,0 +1 @@ +[{"a":"1"},{"a":""},{"a":"2"},{"a":"3"},{"a":""},{"a":""},{"a":"4"}] diff --git a/test/fixtures/leading-newline.csv b/test/fixtures/leading-newline.csv new file mode 100644 index 0000000..d555e5e --- /dev/null +++ b/test/fixtures/leading-newline.csv @@ -0,0 +1,3 @@ + +a,b +A,B diff --git a/test/fixtures/leading-newline.json b/test/fixtures/leading-newline.json new file mode 100644 index 0000000..e265c5e --- /dev/null +++ b/test/fixtures/leading-newline.json @@ -0,0 +1 @@ +[{"a":"A","b":"B"}] diff --git a/test/fixtures/manifest.json b/test/fixtures/manifest.json new file mode 100644 index 0000000..8d97a9b --- /dev/null +++ b/test/fixtures/manifest.json @@ -0,0 +1,88 @@ +{ + "basic": { + "name": "basic two-column CSV" + }, + "basic-array": { + "name": "basic CSV with array output", + "csvFile": "basic", + "opt": { "object": false } + }, + "basic-noheader": { + "name": "basic CSV without header", + "csvFile": "basic", + "opt": { "header": false } + }, + "basic-noheader-names": { + "name": "basic CSV without header with custom field names", + "csvFile": "basic", + "opt": { "header": false, "field": { "names": ["x", "y"] } } + }, + "multirow": { + "name": "multi-row CSV" + }, + "quoted-simple": { + "name": "simple quoted fields" + }, + "quoted-escape": { + "name": "double-quote escaping" + }, + "quoted-newline": { + "name": "quoted fields with embedded newlines" + }, + "empty-fields": { + "name": "empty and extra fields" + }, + "crlf": { + "name": "CRLF line endings" + }, + "trailing-newline": { + "name": "trailing newline handling" + }, + "leading-newline": { + "name": "leading newline handling" + }, + "comment-line": { + "name": "comment lines", + "opt": { "comment": true } + }, + "comment-inline": { + "name": "inline comments", + "opt": { "comment": true } + }, + "trim": { + "name": "trimmed values", + "opt": { "trim": true } + }, + "number": { + "name": "number parsing", + "opt": { "number": true } + }, + "value": { + "name": "value keyword parsing", + "opt": { "value": true } + }, + "pipe-separator": { + "name": "pipe field separator", + "opt": { "field": { "separation": "|" } } + }, + "multi-char-separator": { + "name": "multi-character field separator", + "opt": { "field": { "separation": "~~" } } + }, + "record-separator": { + "name": "custom record separator", + "opt": { "record": { "separators": "%" } } + }, + "empty-records": { + "name": "preserve empty records", + "opt": { "record": { "empty": true } } + }, + "empty-records-default": { + "name": "skip empty records by default", + "csvFile": "empty-records" + }, + "comment-empty": { + "name": "comments with empty records", + "opt": { "comment": true, "record": { "empty": true } } + } +} diff --git a/test/fixtures/multi-char-separator.csv b/test/fixtures/multi-char-separator.csv new file mode 100644 index 0000000..a2f41fb --- /dev/null +++ b/test/fixtures/multi-char-separator.csv @@ -0,0 +1,3 @@ +a~~b~~c +A~~B~~C +AA~~BB~~CC diff --git a/test/fixtures/multi-char-separator.json b/test/fixtures/multi-char-separator.json new file mode 100644 index 0000000..f4668e4 --- /dev/null +++ b/test/fixtures/multi-char-separator.json @@ -0,0 +1 @@ +[{"a":"A","b":"B","c":"C"},{"a":"AA","b":"BB","c":"CC"}] diff --git a/test/fixtures/multirow.csv b/test/fixtures/multirow.csv new file mode 100644 index 0000000..b532031 --- /dev/null +++ b/test/fixtures/multirow.csv @@ -0,0 +1,4 @@ +a,b,c +A,B,C +AA,BB,CC +AAA,BBB,CCC diff --git a/test/fixtures/multirow.json b/test/fixtures/multirow.json new file mode 100644 index 0000000..2128c0b --- /dev/null +++ b/test/fixtures/multirow.json @@ -0,0 +1 @@ +[{"a":"A","b":"B","c":"C"},{"a":"AA","b":"BB","c":"CC"},{"a":"AAA","b":"BBB","c":"CCC"}] diff --git a/test/fixtures/number.csv b/test/fixtures/number.csv new file mode 100644 index 0000000..f4f3001 --- /dev/null +++ b/test/fixtures/number.csv @@ -0,0 +1,3 @@ +a,b +1,2.5 +1e2,abc diff --git a/test/fixtures/number.json b/test/fixtures/number.json new file mode 100644 index 0000000..dcd2454 --- /dev/null +++ b/test/fixtures/number.json @@ -0,0 +1 @@ +[{"a":1,"b":2.5},{"a":100,"b":"abc"}] diff --git a/test/fixtures/pipe-separator.csv b/test/fixtures/pipe-separator.csv new file mode 100644 index 0000000..e8d53ce --- /dev/null +++ b/test/fixtures/pipe-separator.csv @@ -0,0 +1,3 @@ +a|b|c +A|B|C +AA|BB|CC diff --git a/test/fixtures/pipe-separator.json b/test/fixtures/pipe-separator.json new file mode 100644 index 0000000..f4668e4 --- /dev/null +++ b/test/fixtures/pipe-separator.json @@ -0,0 +1 @@ +[{"a":"A","b":"B","c":"C"},{"a":"AA","b":"BB","c":"CC"}] diff --git a/test/fixtures/quoted-escape.csv b/test/fixtures/quoted-escape.csv new file mode 100644 index 0000000..8c30b29 --- /dev/null +++ b/test/fixtures/quoted-escape.csv @@ -0,0 +1,6 @@ +a +"""b" +"b""" +"""b""" +"b""c" +"b""c""d" diff --git a/test/fixtures/quoted-escape.json b/test/fixtures/quoted-escape.json new file mode 100644 index 0000000..0d2bdd2 --- /dev/null +++ b/test/fixtures/quoted-escape.json @@ -0,0 +1 @@ +[{"a":"\"b"},{"a":"b\""},{"a":"\"b\""},{"a":"b\"c"},{"a":"b\"c\"d"}] diff --git a/test/fixtures/quoted-newline.csv b/test/fixtures/quoted-newline.csv new file mode 100644 index 0000000..734a55c --- /dev/null +++ b/test/fixtures/quoted-newline.csv @@ -0,0 +1,5 @@ +a,b +"line1 +line2",simple +"hello","world +!" diff --git a/test/fixtures/quoted-newline.json b/test/fixtures/quoted-newline.json new file mode 100644 index 0000000..c3c8894 --- /dev/null +++ b/test/fixtures/quoted-newline.json @@ -0,0 +1 @@ +[{"a":"line1\nline2","b":"simple"},{"a":"hello","b":"world\n!"}] diff --git a/test/fixtures/quoted-simple.csv b/test/fixtures/quoted-simple.csv new file mode 100644 index 0000000..bc880e6 --- /dev/null +++ b/test/fixtures/quoted-simple.csv @@ -0,0 +1,3 @@ +a,b +"hello","world" +"foo",bar diff --git a/test/fixtures/quoted-simple.json b/test/fixtures/quoted-simple.json new file mode 100644 index 0000000..031e677 --- /dev/null +++ b/test/fixtures/quoted-simple.json @@ -0,0 +1 @@ +[{"a":"hello","b":"world"},{"a":"foo","b":"bar"}] diff --git a/test/fixtures/record-separator.csv b/test/fixtures/record-separator.csv new file mode 100644 index 0000000..830e627 --- /dev/null +++ b/test/fixtures/record-separator.csv @@ -0,0 +1 @@ +a,b,c%A,B,C%AA,BB,CC \ No newline at end of file diff --git a/test/fixtures/record-separator.json b/test/fixtures/record-separator.json new file mode 100644 index 0000000..f4668e4 --- /dev/null +++ b/test/fixtures/record-separator.json @@ -0,0 +1 @@ +[{"a":"A","b":"B","c":"C"},{"a":"AA","b":"BB","c":"CC"}] diff --git a/test/fixtures/trailing-newline.csv b/test/fixtures/trailing-newline.csv new file mode 100644 index 0000000..9255cff --- /dev/null +++ b/test/fixtures/trailing-newline.csv @@ -0,0 +1,3 @@ +a +1 +2 diff --git a/test/fixtures/trailing-newline.json b/test/fixtures/trailing-newline.json new file mode 100644 index 0000000..94d1615 --- /dev/null +++ b/test/fixtures/trailing-newline.json @@ -0,0 +1 @@ +[{"a":"1"},{"a":"2"}] diff --git a/test/fixtures/trim.csv b/test/fixtures/trim.csv new file mode 100644 index 0000000..7998755 --- /dev/null +++ b/test/fixtures/trim.csv @@ -0,0 +1,3 @@ +a,b + hello , world + foo ,bar diff --git a/test/fixtures/trim.json b/test/fixtures/trim.json new file mode 100644 index 0000000..031e677 --- /dev/null +++ b/test/fixtures/trim.json @@ -0,0 +1 @@ +[{"a":"hello","b":"world"},{"a":"foo","b":"bar"}] diff --git a/test/fixtures/value.csv b/test/fixtures/value.csv new file mode 100644 index 0000000..97c34a5 --- /dev/null +++ b/test/fixtures/value.csv @@ -0,0 +1,3 @@ +a,b,c +true,false,null +hello,true,1 diff --git a/test/fixtures/value.json b/test/fixtures/value.json new file mode 100644 index 0000000..22ed72b --- /dev/null +++ b/test/fixtures/value.json @@ -0,0 +1 @@ +[{"a":true,"b":false,"c":null},{"a":"hello","b":true,"c":"1"}]