From 4c727c77179cade36c4364d017b802e5f824149a Mon Sep 17 00:00:00 2001 From: Matthew Nibecker Date: Tue, 2 Jun 2026 14:24:45 -0700 Subject: [PATCH] Auto-defuse SUP output This pr adds functionality to auto-defuse fusion values in sup output. If the -fusion flag is enabled fusion values will be displayed. --- book/src/super-sql/operators/fuse.md | 6 +++--- cli/outputflags/flags.go | 1 + db/ztests/meta.yaml | 2 +- mdtest/mdtest.go | 4 ++++ mdtest/test.go | 8 ++++++-- runtime/sam/expr/function/parse.go | 16 ++++------------ runtime/ztests/expr/function/upcast.yaml | 2 ++ runtime/ztests/expr/fusion-all.yaml | 2 ++ runtime/ztests/expr/search-glob.yaml | 2 +- .../ztests/expr/search-nested-field-regexp.yaml | 2 +- runtime/ztests/expr/search-nested-field.yaml | 4 ++-- runtime/ztests/expr/search-primitives.yaml | 2 +- runtime/ztests/op/fuse.yaml | 16 ++++++++++++++++ sio/supio/writer.go | 12 ++++++++++++ 14 files changed, 56 insertions(+), 23 deletions(-) diff --git a/book/src/super-sql/operators/fuse.md b/book/src/super-sql/operators/fuse.md index eddd4016f7..72e65b29fe 100644 --- a/book/src/super-sql/operators/fuse.md +++ b/book/src/super-sql/operators/fuse.md @@ -29,7 +29,7 @@ Because all values of the input must be read to compute the fused type, --- _Fuse two records_ -```mdtest-spq +```mdtest-spq fusion # spq fuse # input @@ -43,7 +43,7 @@ fusion({a?:_::int64,b?:2},<{b:int64}>) --- _Fuse records with type variation_ -```mdtest-spq +```mdtest-spq fusion # spq fuse # input @@ -57,7 +57,7 @@ fuse --- _Fuse records with complex type variation_ -```mdtest-spq {data-layout="stacked"} +```mdtest-spq fusion {data-layout="stacked"} # spq fuse # input diff --git a/cli/outputflags/flags.go b/cli/outputflags/flags.go index 0beeeddcf7..86ba0b8bc7 100644 --- a/cli/outputflags/flags.go +++ b/cli/outputflags/flags.go @@ -73,6 +73,7 @@ func (f *Flags) SetFormatFlags(fs *flag.FlagSet) { fs.BoolVar(&f.forceBinary, "B", false, "allow Super Binary to be sent to a terminal output") fs.BoolVar(&f.jsonPretty, "J", false, "use formatted JSON output independent of -f option") fs.BoolVar(&f.jsonShortcut, "j", false, "use line-oriented JSON output independent of -f option") + fs.BoolVar(&f.SUP.Fusion, "fusion", false, "display fusion values (fusion values are otherwise auto-defused)") fs.BoolVar(&f.supPretty, "S", false, "use formatted Super JSON output independent of -f option") fs.BoolVar(&f.supShortcut, "s", false, "use line-oriented Super JSON output independent of -f option") } diff --git a/db/ztests/meta.yaml b/db/ztests/meta.yaml index f861036dbb..6bfdc9c0a0 100644 --- a/db/ztests/meta.yaml +++ b/db/ztests/meta.yaml @@ -7,7 +7,7 @@ script: | super db load -q -use poolB b.sup super db -S -c 'from :pools | drop id | sort name | drop ts' echo === - super db -S -c 'from poolA@main:objects | {nameof:nameof(this),...this} | drop id' + super db -fusion -S -c 'from poolA@main:objects | {nameof:nameof(this),...this} | drop id' super db -S -c 'from poolA:log | cut nameof(this)' inputs: diff --git a/mdtest/mdtest.go b/mdtest/mdtest.go index 037d61d493..4cb3b73d4d 100644 --- a/mdtest/mdtest.go +++ b/mdtest/mdtest.go @@ -233,11 +233,14 @@ func parseMarkdown(source []byte) (map[string]string, []*Test, error) { }) case "mdtest-spq": var fails bool + var fusion bool var runtime string for _, word := range fcbInfoWords(fcb, source)[1:] { switch { case word == "fails": fails = true + case word == "fusion": + fusion = true case strings.HasPrefix(word, "runtime="): runtime = strings.TrimPrefix(word, "runtime=") if runtime != "vam" && runtime != "sam" { @@ -263,6 +266,7 @@ func parseMarkdown(source []byte) (map[string]string, []*Test, error) { Input: sections[2], SPQ: sections[1], Runtime: runtime, + Fusion: fusion, }) } return ast.WalkContinue, nil diff --git a/mdtest/test.go b/mdtest/test.go index 487b25f890..30ed2b2dfc 100644 --- a/mdtest/test.go +++ b/mdtest/test.go @@ -23,8 +23,9 @@ type Test struct { Runtime string // "sam", "vam", or "" for both // For SPQ tests - Input string - SPQ string + Input string + SPQ string + Fusion bool // If true do not auto-defuse output } // Run runs the test, returning nil on success. @@ -52,6 +53,9 @@ func (t *Test) run(runtime string) error { var c *exec.Cmd if t.SPQ != "" { c = exec.Command("super", "-s", "-c", t.SPQ) + if t.Fusion { + c.Args = append(c.Args, "-fusion") + } if s := t.Input; strings.TrimSpace(s) != "" { c.Args = append(c.Args, "-") c.Stdin = strings.NewReader(s) diff --git a/runtime/sam/expr/function/parse.go b/runtime/sam/expr/function/parse.go index 3298644ef0..e4f4cf0019 100644 --- a/runtime/sam/expr/function/parse.go +++ b/runtime/sam/expr/function/parse.go @@ -4,10 +4,9 @@ import ( "fmt" "net/url" "strconv" - "strings" "github.com/brimdata/super" - "github.com/brimdata/super/sio/supio" + "github.com/brimdata/super/pkg/byteconv" "github.com/brimdata/super/sup" ) @@ -86,13 +85,10 @@ func (p *ParseURI) Call(args []super.Value) super.Value { type ParseSUP struct { sctx *super.Context - sr *strings.Reader - zr *supio.Reader } func newParseSUP(sctx *super.Context) *ParseSUP { - var sr strings.Reader - return &ParseSUP{sctx, &sr, supio.NewReader(sctx, &sr)} + return &ParseSUP{sctx} } func (p *ParseSUP) Call(args []super.Value) super.Value { @@ -103,13 +99,9 @@ func (p *ParseSUP) Call(args []super.Value) super.Value { if !in.IsString() { return p.sctx.WrapError("parse_sup: string arg required", args[0]) } - p.sr.Reset(super.DecodeString(in.Bytes())) - val, err := p.zr.Read() + val, err := sup.ParseValue(p.sctx, byteconv.UnsafeString(in.Bytes())) if err != nil { return p.sctx.WrapError("parse_sup: "+err.Error(), args[0]) } - if val == nil { - return super.Null - } - return *val + return val } diff --git a/runtime/ztests/expr/function/upcast.yaml b/runtime/ztests/expr/function/upcast.yaml index 7848eb4a28..bc43550cd8 100644 --- a/runtime/ztests/expr/function/upcast.yaml +++ b/runtime/ztests/expr/function/upcast.yaml @@ -59,6 +59,8 @@ input: | type n101=int64 [1::n101,] +output-flags: -fusion + output: | error({message:"upcast: value not a subtype of [int8|string]",on:[1,"a"]}) [1::int8,"a"] diff --git a/runtime/ztests/expr/fusion-all.yaml b/runtime/ztests/expr/fusion-all.yaml index e44435bd66..ed455b250f 100644 --- a/runtime/ztests/expr/fusion-all.yaml +++ b/runtime/ztests/expr/fusion-all.yaml @@ -9,6 +9,8 @@ input: | {x:1} +output-flags: -fusion + output: | fusion(0x02::all,) fusion(0x666f6f::all,) diff --git a/runtime/ztests/expr/search-glob.yaml b/runtime/ztests/expr/search-glob.yaml index 1a3a4708a2..61932a5eca 100644 --- a/runtime/ztests/expr/search-glob.yaml +++ b/runtime/ztests/expr/search-glob.yaml @@ -11,4 +11,4 @@ output: | {a:"foox",b:"there"} {a:"hello",b:"foox"} {a:"",b:"foo"} - fusion({a?:_::string,b?:"fool"},<{b:string}>) + {b:"fool"} diff --git a/runtime/ztests/expr/search-nested-field-regexp.yaml b/runtime/ztests/expr/search-nested-field-regexp.yaml index 830cc50c61..b5ac239f62 100644 --- a/runtime/ztests/expr/search-nested-field-regexp.yaml +++ b/runtime/ztests/expr/search-nested-field-regexp.yaml @@ -12,5 +12,5 @@ input: | output: | {a:[{bar:"foo"}]} {a:[{car:"foo"}]} - fusion({car:"foo"}::(null|{car:string}),<{car:string}>) + {car:"foo"} {a:[]::[{bar:null}]} diff --git a/runtime/ztests/expr/search-nested-field.yaml b/runtime/ztests/expr/search-nested-field.yaml index 9a4823b6b8..1059b93128 100644 --- a/runtime/ztests/expr/search-nested-field.yaml +++ b/runtime/ztests/expr/search-nested-field.yaml @@ -13,6 +13,6 @@ input: | output: | {a:[{b:"foo"}]} {a:[{c:"foo"},{b:1}]} - fusion({a:1,b?:_::string},<{a:int64}>) - fusion({a:2,b?:"foo"},<{a:int64,b:string}>) + {a:1} + {a:2,b:"foo"} {a:[]::[{b:null}]} diff --git a/runtime/ztests/expr/search-primitives.yaml b/runtime/ztests/expr/search-primitives.yaml index 13e11c3a95..2095ef90fa 100644 --- a/runtime/ztests/expr/search-primitives.yaml +++ b/runtime/ztests/expr/search-primitives.yaml @@ -9,4 +9,4 @@ input: | output: | "foo" "foo" - fusion("foo"::(int64|string),) + "foo" diff --git a/runtime/ztests/op/fuse.yaml b/runtime/ztests/op/fuse.yaml index 67884b977e..d62a6731ef 100644 --- a/runtime/ztests/op/fuse.yaml +++ b/runtime/ztests/op/fuse.yaml @@ -6,6 +6,8 @@ input: | {a:"goodnight",b:123::int32} {a:null,b:null,c:null} +output-flags: -fusion + output: | fusion({a:fusion("hello"::(string|null|none),),b:fusion("world"::(int32|string|null),),c:fusion(_::(string|null|none),)},<{a:string,b:string}>) fusion({a:fusion(_::(string|null|none),),b:fusion("goodnight"::(int32|string|null),),c:fusion("gracie"::(string|null|none),)},<{b:string,c:string}>) @@ -24,6 +26,8 @@ input: | [{a:3,b:3}] [{a:null,b:null}] +output-flags: -fusion + output: | fusion([fusion({a:fusion(1::(int64|null|none),),b:fusion(_::(int64|null|none),)},<{a:int64}>)],<[{a:int64}]>) fusion([fusion({a:fusion(_::(int64|null|none),),b:fusion(2::(int64|null|none),)},<{b:int64}>)],<[{b:int64}]>) @@ -44,6 +48,8 @@ input: | [1] ["s"] +output-flags: -fusion + output: | fusion({a:fusion(1::(int64|string),)}::(int64|string|{a:fusion(int64|string)}|[fusion(int64|string)]),<{a:int64}>) fusion({a:fusion("s"::(int64|string),)}::(int64|string|{a:fusion(int64|string)}|[fusion(int64|string)]),<{a:string}>) @@ -62,6 +68,8 @@ input: | {r:{y:4::int32,z:5::int32},s:"world",r2:{x:6::int32}} {a:null,r:{x:null,y:null,z:null},s:null,r2:null} +output-flags: -fusion + output: | fusion({a:fusion("hello"::(string|null|none),),r:fusion({x:fusion(1::int32::(int32|null|none),),y:fusion(2::int32::(int32|null),),z:fusion(_::(int32|null|none),)},<{x:int32,y:int32}>),s:fusion(_::(string|null|none),),r2:fusion(_::(null|none|{x:int32}),)},<{a:string,r:{x:int32,y:int32}}>) fusion({a:fusion(_::(string|null|none),),r:fusion({x:fusion(_::(int32|null|none),),y:fusion(4::int32::(int32|null),),z:fusion(5::int32::(int32|null|none),)},<{y:int32,z:int32}>),s:fusion("world"::(string|null|none),),r2:fusion({x:6::int32}::(null|none|{x:int32}),<{x:int32}>)},<{r:{y:int32,z:int32},s:string,r2:{x:int32}}>) @@ -75,6 +83,8 @@ input: | error(1) error("s") +output-flags: -fusion + output: | fusion(error(fusion(1::(int64|string),)),) fusion(error(fusion("s"::(int64|string),)),) @@ -88,6 +98,8 @@ input: | ["foo"] [null] +output-flags: -fusion + output: | fusion([fusion(1::(int64|string|null),),fusion(2::(int64|string|null),)],<[int64]>) fusion([fusion("foo"::(int64|string|null),)],<[string]>) @@ -102,6 +114,8 @@ input: | {a:["foo"]} {a:[null]} +output-flags: -fusion + output: | {a:fusion([fusion(1::(int64|string|null),),fusion(2::(int64|string|null),)],<[int64]>)} {a:fusion([fusion("foo"::(int64|string|null),)],<[string]>)} @@ -137,6 +151,8 @@ input: | type er2=error(er1) error(1)::er2 +output-flags: -fusion + output: | type a1=int64 type a2=[a1] diff --git a/sio/supio/writer.go b/sio/supio/writer.go index dfb2e392db..34b8bdd2a8 100644 --- a/sio/supio/writer.go +++ b/sio/supio/writer.go @@ -4,6 +4,8 @@ import ( "io" "github.com/brimdata/super" + "github.com/brimdata/super/runtime/sam/expr" + "github.com/brimdata/super/runtime/sam/expr/function" "github.com/brimdata/super/sbuf" "github.com/brimdata/super/sup" "github.com/brimdata/super/vector" @@ -12,17 +14,24 @@ import ( type Writer struct { writer io.WriteCloser formatter *sup.StreamFormatter + defuse expr.Function } type WriterOpts struct { ColorDisabled bool + Fusion bool Pretty int } func NewWriter(w io.WriteCloser, opts WriterOpts) *Writer { + var defuse expr.Function + if !opts.Fusion { + defuse = function.NewDefuse(super.NewContext()) + } return &Writer{ formatter: sup.NewStreamFormatter(opts.Pretty, opts.ColorDisabled), writer: w, + defuse: defuse, } } @@ -35,6 +44,9 @@ func (w *Writer) Close() error { } func (w *Writer) Write(val super.Value) error { + if w.defuse != nil { + val = w.defuse.Call([]super.Value{val}) + } if _, err := io.WriteString(w.writer, w.formatter.FormatValue(val)); err != nil { return err }