elements can have.
+//
+// - align: Obsolete since HTML5
+// - bgcolor: Obsolete since HTML5
+// - char: Obsolete since HTML5
+// - charoff: Obsolete since HTML5
+// - valign: Obsolete since HTML5.
+var TableRowAttributeFilter = html.GlobalAttributeFilter.ExtendString(`align,bgcolor,char,charoff,valign`)
+
+func (r *TableHTMLRenderer) renderTableRow(
+ w util.BufWriter, source []byte, n gast.Node, entering bool) (gast.WalkStatus, error) {
+ if entering {
+ _, _ = w.WriteString("
\n")
+ } else {
+ _, _ = w.WriteString("
\n")
+ if n.Parent().LastChild() == n {
+ _, _ = w.WriteString("\n")
+ }
+ }
+ return gast.WalkContinue, nil
+}
+
+// TableThCellAttributeFilter defines attribute names which table
cells can have.
+//
+// - abbr: [OK] Contains a short abbreviated description of the cell's content [NOT OK in
]
+// - align: Obsolete since HTML5
+// - axis: Obsolete since HTML5
+// - bgcolor: Not Standardized
+// - char: Obsolete since HTML5
+// - charoff: Obsolete since HTML5
+// - colspan: [OK] Number of columns that the cell is to span
+// - headers: [OK] This attribute contains a list of space-separated strings,
+// each corresponding to the id attribute of the
elements that apply to this element
+// - height: Deprecated since HTML4. Obsolete since HTML5
+// - rowspan: [OK] Number of rows that the cell is to span
+// - scope: [OK] This enumerated attribute defines the cells that the header
+// (defined in the
) element relates to [NOT OK in
]
+// - valign: Obsolete since HTML5
+// - width: Deprecated since HTML4. Obsolete since HTML5.
+var TableThCellAttributeFilter = html.GlobalAttributeFilter.ExtendString(`abbr,align,axis,bgcolor,char,charoff,colspan,headers,height,rowspan,scope,valign,width`) // nolint:lll
+
+// TableTdCellAttributeFilter defines attribute names which table
cells can have.
+//
+// - abbr: Obsolete since HTML5. [OK in
]
+// - align: Obsolete since HTML5
+// - axis: Obsolete since HTML5
+// - bgcolor: Not Standardized
+// - char: Obsolete since HTML5
+// - charoff: Obsolete since HTML5
+// - colspan: [OK] Number of columns that the cell is to span
+// - headers: [OK] This attribute contains a list of space-separated strings, each corresponding
+// to the id attribute of the
elements that apply to this element
+// - height: Deprecated since HTML4. Obsolete since HTML5
+// - rowspan: [OK] Number of rows that the cell is to span
+// - scope: Obsolete since HTML5. [OK in
]
+// - valign: Obsolete since HTML5
+// - width: Deprecated since HTML4. Obsolete since HTML5.
+var TableTdCellAttributeFilter = html.GlobalAttributeFilter.ExtendString(`abbr,align,axis,bgcolor,char,charoff,colspan,headers,height,rowspan,scope,valign,width`) // nolint: lll
+
+func (r *TableHTMLRenderer) renderTableCell(
+ w util.BufWriter, source []byte, node gast.Node, entering bool) (gast.WalkStatus, error) {
+ n := node.(*ast.TableCell)
+ tag := "td"
+ if n.Parent().Kind() == ast.KindTableHeader {
+ tag = "th"
+ }
+ if entering {
+ _, _ = fmt.Fprintf(w, "<%s", tag)
+ if n.Alignment != ast.AlignNone {
+ amethod := r.TableConfig.TableCellAlignMethod
+ if amethod == TableCellAlignDefault {
+ if r.Config.XHTML {
+ amethod = TableCellAlignAttribute
+ } else {
+ amethod = TableCellAlignStyle
+ }
+ }
+ switch amethod {
+ case TableCellAlignAttribute:
+ if _, ok := n.AttributeString("align"); !ok { // Skip align render if overridden
+ _, _ = fmt.Fprintf(w, ` align="%s"`, n.Alignment.String())
+ }
+ case TableCellAlignStyle:
+ v, ok := n.AttributeString("style")
+ var cob util.CopyOnWriteBuffer
+ if ok {
+ switch v := v.(type) {
+ case []byte:
+ cob = util.NewCopyOnWriteBuffer(v)
+ case string:
+ cob = util.NewCopyOnWriteBuffer([]byte(v))
+ }
+ cob.AppendByte(';')
+ }
+ style := fmt.Sprintf("text-align:%s", n.Alignment.String())
+ cob.AppendString(style)
+ n.SetAttributeString("style", cob.Bytes())
+ }
+ }
+ if n.Attributes() != nil {
+ if tag == "td" {
+ html.RenderAttributes(w, n, TableTdCellAttributeFilter) //
+ } else {
+ html.RenderAttributes(w, n, TableThCellAttributeFilter) //
+ }
+ }
+ _ = w.WriteByte('>')
+ } else {
+ _, _ = fmt.Fprintf(w, "%s>\n", tag)
+ }
+ return gast.WalkContinue, nil
+}
+
+type table struct {
+ options []TableOption
+}
+
+// Table is an extension that allow you to use GFM tables .
+var Table = &table{
+ options: []TableOption{},
+}
+
+// NewTable returns a new extension with given options.
+func NewTable(opts ...TableOption) goldmark.Extender {
+ return &table{
+ options: opts,
+ }
+}
+
+func (e *table) Extend(m goldmark.Markdown) {
+ m.Parser().AddOptions(
+ parser.WithParagraphTransformers(
+ util.Prioritized(NewTableParagraphTransformer(), 200),
+ ),
+ parser.WithASTTransformers(
+ util.Prioritized(defaultTableASTTransformer, 0),
+ ),
+ )
+ m.Renderer().AddOptions(renderer.WithNodeRenderers(
+ util.Prioritized(NewTableHTMLRenderer(e.options...), 500),
+ ))
+}
diff --git a/pkg/goldmark/extension/table_options_test.go b/pkg/goldmark/extension/table_options_test.go
new file mode 100644
index 000000000..d008f9814
--- /dev/null
+++ b/pkg/goldmark/extension/table_options_test.go
@@ -0,0 +1,160 @@
+package extension_test
+
+// Coverage for the extension Table option dispatchers — each
+// With*Option type's SetTableOption and SetConfig methods, plus
+// the three TableCellAlignMethod variants and the
+// NewTableASTTransformer constructor reachable only through the
+// Extender path that's already wired by extension.Table.
+
+import (
+ "bytes"
+ "strings"
+ "testing"
+
+ "github.com/yuin/goldmark"
+ "github.com/yuin/goldmark/extension"
+ "github.com/yuin/goldmark/renderer"
+ "github.com/yuin/goldmark/renderer/html"
+ "github.com/yuin/goldmark/util"
+)
+
+const tableOptSrc = "| h1 | h2 | h3 |\n|:---|:--:|---:|\n| a | b | c |\n"
+
+func renderTableWith(t *testing.T, opts ...extension.TableOption) string {
+ t.Helper()
+ r := renderer.NewRenderer(
+ renderer.WithNodeRenderers(
+ util.Prioritized(html.NewRenderer(), 1000),
+ util.Prioritized(extension.NewTableHTMLRenderer(opts...), 500),
+ ),
+ )
+ md := goldmark.New(
+ goldmark.WithExtensions(extension.Table),
+ goldmark.WithRenderer(r),
+ )
+ var buf bytes.Buffer
+ if err := md.Convert([]byte(tableOptSrc), &buf); err != nil {
+ t.Fatalf("Convert: %v", err)
+ }
+ return buf.String()
+}
+
+func TestTable_WithCellAlignMethod_Default(t *testing.T) {
+ out := renderTableWith(t, extension.WithTableCellAlignMethod(extension.TableCellAlignDefault))
+ // Default emits style="text-align:..." per cell.
+ if !strings.Contains(out, "text-align:") {
+ t.Errorf("default cell-align should emit style=text-align: in output: %q", out)
+ }
+}
+
+func TestTable_WithCellAlignMethod_Attribute(t *testing.T) {
+ out := renderTableWith(t, extension.WithTableCellAlignMethod(extension.TableCellAlignAttribute))
+ if !strings.Contains(out, "align=") {
+ t.Errorf("Attribute method should emit align= in output: %q", out)
+ }
+}
+
+func TestTable_WithCellAlignMethod_Style(t *testing.T) {
+ out := renderTableWith(t, extension.WithTableCellAlignMethod(extension.TableCellAlignStyle))
+ if !strings.Contains(out, "style=") {
+ t.Errorf("Style method should emit style= in output: %q", out)
+ }
+}
+
+func TestTable_DefaultExtenderPath(t *testing.T) {
+ // Verify the extension.Table Extender path: NewTableConfig,
+ // NewTableParser, NewTableASTTransformer all run as part of
+ // Extender wiring.
+ md := goldmark.New(goldmark.WithExtensions(extension.Table))
+ var buf bytes.Buffer
+ if err := md.Convert([]byte(tableOptSrc), &buf); err != nil {
+ t.Fatalf("Convert: %v", err)
+ }
+ if !strings.Contains(buf.String(), "
") {
+ t.Errorf("default Extender path produced no
: %q", buf.String())
+ }
+}
+
+func TestTable_OptionsAsRendererOptions(t *testing.T) {
+ // Table options also implement renderer.Option (SetConfig) so
+ // they can be applied via AddOptions after construction.
+ r := renderer.NewRenderer(renderer.WithNodeRenderers(
+ util.Prioritized(html.NewRenderer(), 1000),
+ util.Prioritized(extension.NewTableHTMLRenderer(), 500),
+ ))
+ r.AddOptions(
+ extension.WithTableCellAlignMethod(extension.TableCellAlignStyle).(renderer.Option),
+ extension.WithTableHTMLOptions(html.WithUnsafe()).(renderer.Option),
+ )
+ md := goldmark.New(goldmark.WithExtensions(extension.Table), goldmark.WithRenderer(r))
+ var buf bytes.Buffer
+ if err := md.Convert([]byte(tableOptSrc), &buf); err != nil {
+ t.Fatalf("Convert: %v", err)
+ }
+}
+
+func TestTable_PrefixParagraphSplit(t *testing.T) {
+ // When a paragraph contains a non-table prefix line, then
+ // the table header, then the delimiter row, the table
+ // transformer slices off the prefix as a separate paragraph
+ // (else branch: trim last newline).
+ src := "prefix paragraph line\n| h1 | h2 |\n|---|---|\n| a | b |\n"
+ md := goldmark.New(goldmark.WithExtensions(extension.Table))
+ var buf bytes.Buffer
+ if err := md.Convert([]byte(src), &buf); err != nil {
+ t.Fatalf("Convert: %v", err)
+ }
+}
+
+func TestTable_ColumnMismatchRejected(t *testing.T) {
+ // tableParagraphTransformer's "header.ChildCount() !=
+ // len(alignments)" branch fires when the header row has a
+ // different column count than the delimiter row. The
+ // paragraph stays a paragraph (no table).
+ srcs := []string{
+ "| a |\n|---|---|---|\n| b |\n", // 1 vs 3 cols
+ "| h1 | h2 | h3 |\n|---|\n| a | b | c |\n", // 3 vs 1 col
+ "| h |\n| not delim |\n", // 2nd line not a delim
+ "single line paragraph\n", // 1 line only
+ "line one\nline two\nline three\n", // no delim row
+ }
+ for _, src := range srcs {
+ md := goldmark.New(goldmark.WithExtensions(extension.Table))
+ var buf bytes.Buffer
+ if err := md.Convert([]byte(src), &buf); err != nil {
+ t.Fatalf("Convert(%q): %v", src, err)
+ }
+ }
+}
+
+func TestNewTable_Extender(t *testing.T) {
+ // NewTable returns an Extender; plug it in with explicit
+ // options.
+ ext := extension.NewTable(extension.WithTableCellAlignMethod(extension.TableCellAlignAttribute))
+ md := goldmark.New(goldmark.WithExtensions(ext))
+ var buf bytes.Buffer
+ if err := md.Convert([]byte(tableOptSrc), &buf); err != nil {
+ t.Fatalf("Convert: %v", err)
+ }
+}
+
+func TestTable_EscapedPipeInCell_DrivesASTTransformer(t *testing.T) {
+ // `\|` inside a cell — and especially inside a code span — is
+ // what makes tableASTTransformer.Transform do real work: it
+ // rewrites the inline AST so the pipe becomes a literal rather
+ // than a column delimiter. Without this case the transformer
+ // returns immediately on the lst==nil branch.
+ src := "| h1 | h2 |\n|----|----|\n| `a\\|b` | c |\n"
+ md := goldmark.New(goldmark.WithExtensions(extension.Table))
+ var buf bytes.Buffer
+ if err := md.Convert([]byte(src), &buf); err != nil {
+ t.Fatalf("Convert: %v", err)
+ }
+ out := buf.String()
+ if !strings.Contains(out, "
") {
+ t.Errorf("table missing in output: %q", out)
+ }
+ if !strings.Contains(out, "") {
+ t.Errorf("code span missing in output: %q", out)
+ }
+}
diff --git a/pkg/goldmark/extension/tasklist.go b/pkg/goldmark/extension/tasklist.go
new file mode 100644
index 000000000..4467ebfff
--- /dev/null
+++ b/pkg/goldmark/extension/tasklist.go
@@ -0,0 +1,120 @@
+package extension
+
+import (
+ "regexp"
+
+ "github.com/yuin/goldmark"
+ gast "github.com/yuin/goldmark/ast"
+ "github.com/yuin/goldmark/extension/ast"
+ "github.com/yuin/goldmark/parser"
+ "github.com/yuin/goldmark/renderer"
+ "github.com/yuin/goldmark/renderer/html"
+ "github.com/yuin/goldmark/text"
+ "github.com/yuin/goldmark/util"
+)
+
+var taskListRegexp = regexp.MustCompile(`^\[([\sxX])\]\s*`)
+
+type taskCheckBoxParser struct {
+}
+
+var defaultTaskCheckBoxParser = &taskCheckBoxParser{}
+
+// NewTaskCheckBoxParser returns a new InlineParser that can parse
+// checkboxes in list items.
+// This parser must take precedence over the parser.LinkParser.
+func NewTaskCheckBoxParser() parser.InlineParser {
+ return defaultTaskCheckBoxParser
+}
+
+func (s *taskCheckBoxParser) Trigger() []byte {
+ return []byte{'['}
+}
+
+func (s *taskCheckBoxParser) Parse(parent gast.Node, block text.Reader, pc parser.Context) gast.Node {
+ // Given AST structure must be like
+ // - List
+ // - ListItem : parent.Parent
+ // - TextBlock : parent
+ // (current line)
+ if parent.Parent() == nil || parent.Parent().FirstChild() != parent {
+ return nil
+ }
+
+ if parent.HasChildren() {
+ return nil
+ }
+ if _, ok := parent.Parent().(*gast.ListItem); !ok {
+ return nil
+ }
+ line, _ := block.PeekLine()
+ m := taskListRegexp.FindSubmatchIndex(line)
+ if m == nil {
+ return nil
+ }
+ value := line[m[2]:m[3]][0]
+ block.Advance(m[1])
+ checked := value == 'x' || value == 'X'
+ return ast.NewTaskCheckBox(checked)
+}
+
+func (s *taskCheckBoxParser) CloseBlock(parent gast.Node, pc parser.Context) {
+ // nothing to do
+}
+
+// TaskCheckBoxHTMLRenderer is a renderer.NodeRenderer implementation that
+// renders checkboxes in list items.
+type TaskCheckBoxHTMLRenderer struct {
+ html.Config
+}
+
+// NewTaskCheckBoxHTMLRenderer returns a new TaskCheckBoxHTMLRenderer.
+func NewTaskCheckBoxHTMLRenderer(opts ...html.Option) renderer.NodeRenderer {
+ r := &TaskCheckBoxHTMLRenderer{
+ Config: html.NewConfig(),
+ }
+ for _, opt := range opts {
+ opt.SetHTMLOption(&r.Config)
+ }
+ return r
+}
+
+// RegisterFuncs implements renderer.NodeRenderer.RegisterFuncs.
+func (r *TaskCheckBoxHTMLRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
+ reg.Register(ast.KindTaskCheckBox, r.renderTaskCheckBox)
+}
+
+func (r *TaskCheckBoxHTMLRenderer) renderTaskCheckBox(
+ w util.BufWriter, source []byte, node gast.Node, entering bool) (gast.WalkStatus, error) {
+ if !entering {
+ return gast.WalkContinue, nil
+ }
+ n := node.(*ast.TaskCheckBox)
+
+ if n.IsChecked {
+ _, _ = w.WriteString(` ")
+ } else {
+ _, _ = w.WriteString("> ")
+ }
+ return gast.WalkContinue, nil
+}
+
+type taskList struct {
+}
+
+// TaskList is an extension that allow you to use GFM task lists.
+var TaskList = &taskList{}
+
+func (e *taskList) Extend(m goldmark.Markdown) {
+ m.Parser().AddOptions(parser.WithInlineParsers(
+ util.Prioritized(NewTaskCheckBoxParser(), 0),
+ ))
+ m.Renderer().AddOptions(renderer.WithNodeRenderers(
+ util.Prioritized(NewTaskCheckBoxHTMLRenderer(), 500),
+ ))
+}
diff --git a/pkg/goldmark/go.mod b/pkg/goldmark/go.mod
new file mode 100644
index 000000000..c0423114d
--- /dev/null
+++ b/pkg/goldmark/go.mod
@@ -0,0 +1,3 @@
+module github.com/yuin/goldmark
+
+go 1.22
diff --git a/pkg/goldmark/go.sum b/pkg/goldmark/go.sum
new file mode 100644
index 000000000..e69de29bb
diff --git a/pkg/goldmark/markdown.go b/pkg/goldmark/markdown.go
new file mode 100644
index 000000000..402421507
--- /dev/null
+++ b/pkg/goldmark/markdown.go
@@ -0,0 +1,137 @@
+// Package goldmark implements a Markdown parser. mdsmith vendors this
+// fork to thread a per-parser BlockReader (plan 197) and absorb the
+// four structural allocators (plan 198) without rebuilding goldmark
+// from scratch. The package layout is identical to upstream so
+// every consumer import path stays `github.com/yuin/goldmark/...`.
+package goldmark
+
+import (
+ "io"
+
+ "github.com/yuin/goldmark/parser"
+ "github.com/yuin/goldmark/renderer"
+ "github.com/yuin/goldmark/renderer/html"
+ "github.com/yuin/goldmark/text"
+ "github.com/yuin/goldmark/util"
+)
+
+// DefaultParser returns a new Parser configured with goldmark's
+// default block parsers, inline parsers, and paragraph transformers.
+func DefaultParser() parser.Parser {
+ return parser.NewParser(
+ parser.WithBlockParsers(parser.DefaultBlockParsers()...),
+ parser.WithInlineParsers(parser.DefaultInlineParsers()...),
+ parser.WithParagraphTransformers(parser.DefaultParagraphTransformers()...),
+ )
+}
+
+// DefaultRenderer returns a new Renderer configured by default values.
+func DefaultRenderer() renderer.Renderer {
+ return renderer.NewRenderer(renderer.WithNodeRenderers(util.Prioritized(html.NewRenderer(), 1000)))
+}
+
+var defaultMarkdown = New()
+
+// Convert interprets a UTF-8 bytes source in Markdown and writes the
+// rendered output to w. mdsmith does not call this — it parses only —
+// but the upstream extension Extend methods register HTML node
+// renderers, so the rendering pipeline stays wired.
+func Convert(source []byte, w io.Writer, opts ...parser.ParseOption) error {
+ return defaultMarkdown.Convert(source, w, opts...)
+}
+
+// A Markdown converts Markdown text to a desired format.
+type Markdown interface {
+ // Convert reads UTF-8 Markdown from source, parses it, and
+ // writes rendered output to w.
+ Convert(source []byte, writer io.Writer, opts ...parser.ParseOption) error
+
+ // Parser returns the Parser that will be used to build the AST.
+ Parser() parser.Parser
+
+ // SetParser swaps the underlying Parser.
+ SetParser(parser.Parser)
+
+ // Renderer returns the Renderer that will be used to emit output.
+ Renderer() renderer.Renderer
+
+ // SetRenderer swaps the underlying Renderer.
+ SetRenderer(renderer.Renderer)
+}
+
+// Option is a functional option type for Markdown objects.
+type Option func(*markdown)
+
+// WithExtensions adds the given extensions to the Markdown.
+func WithExtensions(ext ...Extender) Option {
+ return func(m *markdown) {
+ m.extensions = append(m.extensions, ext...)
+ }
+}
+
+// WithParser overrides the default parser.
+func WithParser(p parser.Parser) Option {
+ return func(m *markdown) {
+ m.parser = p
+ }
+}
+
+// WithParserOptions applies options to the parser.
+func WithParserOptions(opts ...parser.Option) Option {
+ return func(m *markdown) {
+ m.parser.AddOptions(opts...)
+ }
+}
+
+// WithRenderer overrides the default renderer.
+func WithRenderer(r renderer.Renderer) Option {
+ return func(m *markdown) {
+ m.renderer = r
+ }
+}
+
+// WithRendererOptions applies options to the renderer.
+func WithRendererOptions(opts ...renderer.Option) Option {
+ return func(m *markdown) {
+ m.renderer.AddOptions(opts...)
+ }
+}
+
+type markdown struct {
+ parser parser.Parser
+ renderer renderer.Renderer
+ extensions []Extender
+}
+
+// New returns a new Markdown configured by the given options. Each
+// registered extension's Extend method is invoked before return.
+func New(options ...Option) Markdown {
+ md := &markdown{
+ parser: DefaultParser(),
+ renderer: DefaultRenderer(),
+ extensions: []Extender{},
+ }
+ for _, opt := range options {
+ opt(md)
+ }
+ for _, e := range md.extensions {
+ e.Extend(md)
+ }
+ return md
+}
+
+func (m *markdown) Convert(source []byte, writer io.Writer, opts ...parser.ParseOption) error {
+ reader := text.NewReader(source)
+ doc := m.parser.Parse(reader, opts...)
+ return m.renderer.Render(writer, source, doc)
+}
+
+func (m *markdown) Parser() parser.Parser { return m.parser }
+func (m *markdown) SetParser(v parser.Parser) { m.parser = v }
+func (m *markdown) Renderer() renderer.Renderer { return m.renderer }
+func (m *markdown) SetRenderer(v renderer.Renderer) { m.renderer = v }
+
+// An Extender hooks additional parsers/renderers onto a Markdown.
+type Extender interface {
+ Extend(Markdown)
+}
diff --git a/pkg/goldmark/markdown_test.go b/pkg/goldmark/markdown_test.go
new file mode 100644
index 000000000..bc5c39bbf
--- /dev/null
+++ b/pkg/goldmark/markdown_test.go
@@ -0,0 +1,70 @@
+package goldmark_test
+
+// Coverage for the top-level goldmark.Convert helper plus the
+// Markdown setters and parser-option dispatchers.
+
+import (
+ "bytes"
+ "strings"
+ "testing"
+
+ "github.com/yuin/goldmark"
+ "github.com/yuin/goldmark/parser"
+)
+
+func TestConvert_TopLevel(t *testing.T) {
+ // Convert() is the package-level convenience wrapper around
+ // defaultMarkdown.Convert(). Drive a small markdown sample
+ // through it.
+ var buf bytes.Buffer
+ if err := goldmark.Convert([]byte("# Title\n\nbody\n"), &buf); err != nil {
+ t.Fatalf("Convert: %v", err)
+ }
+ out := buf.String()
+ if !strings.Contains(out, "
Title
") {
+ t.Errorf("Convert output missing
Title
: %q", out)
+ }
+}
+
+func TestNew_WithParserAndOptions(t *testing.T) {
+ // WithParser swaps the parser entirely; WithParserOptions
+ // passes parser-level options at New time.
+ customParser := parser.NewParser(
+ parser.WithBlockParsers(parser.DefaultBlockParsers()...),
+ parser.WithInlineParsers(parser.DefaultInlineParsers()...),
+ )
+ md := goldmark.New(
+ goldmark.WithParser(customParser),
+ goldmark.WithParserOptions(parser.WithAttribute()),
+ )
+ var buf bytes.Buffer
+ if err := md.Convert([]byte("# Title {#id}\n"), &buf); err != nil {
+ t.Fatalf("Convert: %v", err)
+ }
+}
+
+func TestMarkdown_SetParserAndSetRenderer(t *testing.T) {
+ // SetParser replaces the underlying parser after construction;
+ // SetRenderer does the same for the renderer.
+ md := goldmark.New()
+ originalParser := md.Parser()
+ newParser := parser.NewParser(
+ parser.WithBlockParsers(parser.DefaultBlockParsers()...),
+ parser.WithInlineParsers(parser.DefaultInlineParsers()...),
+ parser.WithParagraphTransformers(parser.DefaultParagraphTransformers()...),
+ )
+ md.SetParser(newParser)
+ if md.Parser() == originalParser {
+ t.Error("SetParser did not replace the underlying parser")
+ }
+ originalRenderer := md.Renderer()
+ md.SetRenderer(goldmark.DefaultRenderer())
+ if md.Renderer() == originalRenderer {
+ t.Error("SetRenderer did not replace the underlying renderer")
+ }
+ // Convert must still work after both swaps.
+ var buf bytes.Buffer
+ if err := md.Convert([]byte("# X\n"), &buf); err != nil {
+ t.Fatalf("Convert after swap: %v", err)
+ }
+}
diff --git a/pkg/goldmark/parser/attribute.go b/pkg/goldmark/parser/attribute.go
new file mode 100644
index 000000000..5647a5155
--- /dev/null
+++ b/pkg/goldmark/parser/attribute.go
@@ -0,0 +1,329 @@
+package parser
+
+import (
+ "bytes"
+ "io"
+ "strconv"
+
+ "github.com/yuin/goldmark/text"
+ "github.com/yuin/goldmark/util"
+)
+
+var attrNameID = []byte("id")
+var attrNameClass = []byte("class")
+
+// An Attribute is an attribute of the markdown elements.
+type Attribute struct {
+ Name []byte
+ Value any
+}
+
+// An Attributes is a collection of attributes.
+type Attributes []Attribute
+
+// Find returns a (value, true) if an attribute correspond with given name is found, otherwise (nil, false).
+func (as Attributes) Find(name []byte) (any, bool) {
+ for _, a := range as {
+ if bytes.Equal(a.Name, name) {
+ return a.Value, true
+ }
+ }
+ return nil, false
+}
+
+func (as Attributes) findUpdate(name []byte, cb func(v any) any) bool {
+ for i, a := range as {
+ if bytes.Equal(a.Name, name) {
+ as[i].Value = cb(a.Value)
+ return true
+ }
+ }
+ return false
+}
+
+// ParseAttributes parses attributes into a map.
+// ParseAttributes returns a parsed attributes and true if could parse
+// attributes, otherwise nil and false.
+func ParseAttributes(reader text.Reader) (Attributes, bool) {
+ savedLine, savedPosition := reader.Position()
+ reader.SkipSpaces()
+ if reader.Peek() != '{' {
+ reader.SetPosition(savedLine, savedPosition)
+ return nil, false
+ }
+ reader.Advance(1)
+ attrs := Attributes{}
+ for {
+ if reader.Peek() == '}' {
+ reader.Advance(1)
+ return attrs, true
+ }
+ attr, ok := parseAttribute(reader)
+ if !ok {
+ reader.SetPosition(savedLine, savedPosition)
+ return nil, false
+ }
+ if bytes.Equal(attr.Name, attrNameClass) {
+ if !attrs.findUpdate(attrNameClass, func(v any) any {
+ ret := make([]byte, 0, len(v.([]byte))+1+len(attr.Value.([]byte)))
+ ret = append(ret, v.([]byte)...)
+ return append(append(ret, ' '), attr.Value.([]byte)...)
+ }) {
+ attrs = append(attrs, attr)
+ }
+ } else {
+ attrs = append(attrs, attr)
+ }
+ reader.SkipSpaces()
+ if reader.Peek() == ',' {
+ reader.Advance(1)
+ reader.SkipSpaces()
+ }
+ }
+}
+
+func parseAttribute(reader text.Reader) (Attribute, bool) {
+ reader.SkipSpaces()
+ c := reader.Peek()
+ if c == '#' || c == '.' {
+ reader.Advance(1)
+ line, _ := reader.PeekLine()
+ i := 0
+ // HTML5 allows any kind of characters as id, but XHTML restricts characters for id.
+ // CommonMark is basically defined for XHTML(even though it is legacy).
+ // So we restrict id characters.
+ for ; i < len(line) && !util.IsSpace(line[i]) &&
+ (!util.IsPunct(line[i]) || line[i] == '_' ||
+ line[i] == '-' || line[i] == ':' || line[i] == '.'); i++ {
+ }
+ name := attrNameClass
+ if c == '#' {
+ name = attrNameID
+ }
+ reader.Advance(i)
+ return Attribute{Name: name, Value: line[0:i]}, true
+ }
+ line, _ := reader.PeekLine()
+ if len(line) == 0 {
+ return Attribute{}, false
+ }
+ c = line[0]
+ if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
+ c == '_' || c == ':') {
+ return Attribute{}, false
+ }
+ i := 0
+ for ; i < len(line); i++ {
+ c = line[i]
+ if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9') ||
+ c == '_' || c == ':' || c == '.' || c == '-') {
+ break
+ }
+ }
+ name := line[:i]
+ reader.Advance(i)
+ reader.SkipSpaces()
+ c = reader.Peek()
+ if c != '=' {
+ return Attribute{}, false
+ }
+ reader.Advance(1)
+ reader.SkipSpaces()
+ value, ok := parseAttributeValue(reader)
+ if !ok {
+ return Attribute{}, false
+ }
+ if bytes.Equal(name, attrNameClass) {
+ if _, ok = value.([]byte); !ok {
+ return Attribute{}, false
+ }
+ }
+ return Attribute{Name: name, Value: value}, true
+}
+
+func parseAttributeValue(reader text.Reader) (any, bool) {
+ reader.SkipSpaces()
+ c := reader.Peek()
+ var value any
+ var ok bool
+ switch c {
+ case text.EOF:
+ return Attribute{}, false
+ case '{':
+ value, ok = ParseAttributes(reader)
+ case '[':
+ value, ok = parseAttributeArray(reader)
+ case '"':
+ value, ok = parseAttributeString(reader)
+ default:
+ if c == '-' || c == '+' || util.IsNumeric(c) {
+ value, ok = parseAttributeNumber(reader)
+ } else {
+ value, ok = parseAttributeOthers(reader)
+ }
+ }
+ if !ok {
+ return nil, false
+ }
+ return value, true
+}
+
+func parseAttributeArray(reader text.Reader) ([]any, bool) {
+ reader.Advance(1) // skip [
+ ret := []any{}
+ for i := 0; ; i++ {
+ c := reader.Peek()
+ comma := false
+ if i != 0 && c == ',' {
+ reader.Advance(1)
+ comma = true
+ }
+ if c == ']' {
+ if !comma {
+ reader.Advance(1)
+ return ret, true
+ }
+ return nil, false
+ }
+ reader.SkipSpaces()
+ value, ok := parseAttributeValue(reader)
+ if !ok {
+ return nil, false
+ }
+ ret = append(ret, value)
+ reader.SkipSpaces()
+ }
+}
+
+func parseAttributeString(reader text.Reader) ([]byte, bool) {
+ reader.Advance(1) // skip "
+ line, _ := reader.PeekLine()
+ i := 0
+ l := len(line)
+ var buf bytes.Buffer
+ for i < l {
+ c := line[i]
+ if c == '\\' && i != l-1 {
+ n := line[i+1]
+ switch n {
+ case '"', '/', '\\':
+ buf.WriteByte(n)
+ i += 2
+ case 'b':
+ buf.WriteString("\b")
+ i += 2
+ case 'f':
+ buf.WriteString("\f")
+ i += 2
+ case 'n':
+ buf.WriteString("\n")
+ i += 2
+ case 'r':
+ buf.WriteString("\r")
+ i += 2
+ case 't':
+ buf.WriteString("\t")
+ i += 2
+ default:
+ buf.WriteByte('\\')
+ i++
+ }
+ continue
+ }
+ if c == '"' {
+ reader.Advance(i + 1)
+ return buf.Bytes(), true
+ }
+ buf.WriteByte(c)
+ i++
+ }
+ return nil, false
+}
+
+func scanAttributeDecimal(reader text.Reader, w io.ByteWriter) {
+ for {
+ c := reader.Peek()
+ if util.IsNumeric(c) {
+ _ = w.WriteByte(c)
+ } else {
+ return
+ }
+ reader.Advance(1)
+ }
+}
+
+func parseAttributeNumber(reader text.Reader) (float64, bool) {
+ sign := 1
+ c := reader.Peek()
+ if c == '-' {
+ sign = -1
+ reader.Advance(1)
+ } else if c == '+' {
+ reader.Advance(1)
+ }
+ var buf bytes.Buffer
+ if !util.IsNumeric(reader.Peek()) {
+ return 0, false
+ }
+ scanAttributeDecimal(reader, &buf)
+ if buf.Len() == 0 {
+ return 0, false
+ }
+ c = reader.Peek()
+ if c == '.' {
+ buf.WriteByte(c)
+ reader.Advance(1)
+ scanAttributeDecimal(reader, &buf)
+ }
+ c = reader.Peek()
+ if c == 'e' || c == 'E' {
+ buf.WriteByte(c)
+ reader.Advance(1)
+ c = reader.Peek()
+ if c == '-' || c == '+' {
+ buf.WriteByte(c)
+ reader.Advance(1)
+ }
+ scanAttributeDecimal(reader, &buf)
+ }
+ f, err := strconv.ParseFloat(buf.String(), 64)
+ if err != nil {
+ return 0, false
+ }
+ return float64(sign) * f, true
+}
+
+var bytesTrue = []byte("true")
+var bytesFalse = []byte("false")
+var bytesNull = []byte("null")
+
+func parseAttributeOthers(reader text.Reader) (any, bool) {
+ line, _ := reader.PeekLine()
+ c := line[0]
+ if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
+ c == '_' || c == ':') {
+ return nil, false
+ }
+ i := 0
+ for ; i < len(line); i++ {
+ c := line[i]
+ if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9') ||
+ c == '_' || c == ':' || c == '.' || c == '-') {
+ break
+ }
+ }
+ value := line[:i]
+ reader.Advance(i)
+ if bytes.Equal(value, bytesTrue) {
+ return true, true
+ }
+ if bytes.Equal(value, bytesFalse) {
+ return false, true
+ }
+ if bytes.Equal(value, bytesNull) {
+ return nil, true
+ }
+ return value, true
+}
diff --git a/pkg/goldmark/parser/attribute_test.go b/pkg/goldmark/parser/attribute_test.go
new file mode 100644
index 000000000..0ab0d851e
--- /dev/null
+++ b/pkg/goldmark/parser/attribute_test.go
@@ -0,0 +1,97 @@
+package parser
+
+// Coverage for the attribute syntax parser — `{#id .class
+// k=v k="quoted" k='single' k=123 k=[1,2,3]}`. Drives each leaf
+// parser: parseAttributeString, parseAttributeNumber,
+// parseAttributeArray, parseAttributeOthers, plus Find / findUpdate.
+
+import (
+ "bytes"
+ "testing"
+
+ "github.com/yuin/goldmark/text"
+)
+
+func TestParseAttributes_ValueShapes(t *testing.T) {
+ // Drive every value-type branch in parseAttributeValue.
+ cases := []struct {
+ name string
+ src string // body inside outer braces
+ }{
+ {"id", `{#my-id}`},
+ {"class", `{.my-class}`},
+ {"double-quoted", `{k="v"}`},
+ {"unquoted", `{k=v}`},
+ {"integer", `{k=42}`},
+ {"negative-integer", `{k=-7}`},
+ {"float", `{k=3.14}`},
+ {"true", `{k=true}`},
+ {"false", `{k=false}`},
+ {"null", `{k=null}`},
+ {"array", `{k=[1, 2, 3]}`},
+ {"array-strings", `{k=["a", "b"]}`},
+ {"array-mixed", `{k=[1, "x", true]}`},
+ {"multiple-attrs", `{#i .c k=v key="quoted" n=1}`},
+ }
+ for _, tc := range cases {
+ t.Run(tc.name, func(t *testing.T) {
+ r := text.NewReader([]byte(tc.src))
+ attrs, ok := ParseAttributes(r)
+ if !ok {
+ t.Fatalf("ParseAttributes failed for %q", tc.src)
+ }
+ if len(attrs) == 0 {
+ t.Errorf("ParseAttributes returned no attributes for %q", tc.src)
+ }
+ })
+ }
+}
+
+func TestParseAttributes_Malformed(t *testing.T) {
+ cases := []string{
+ `{=v}`, // empty key
+ `{k=}`, // empty value
+ `{k="unclos`, // unclosed double-quoted
+ `{k='unclos`, // unclosed single-quoted
+ `{k=[1, 2`, // unclosed array
+ `{`, // bare opener
+ }
+ for _, src := range cases {
+ t.Run(src, func(t *testing.T) {
+ r := text.NewReader([]byte(src))
+ _, _ = ParseAttributes(r)
+ // Just verifying ParseAttributes doesn't panic on
+ // malformed input. The return value is intentionally
+ // not asserted because the parser tolerates a wide
+ // range of partial input.
+ })
+ }
+}
+
+func TestAttributesFind(t *testing.T) {
+ // Build an Attributes via ParseAttributes, then Find each key.
+ r := text.NewReader([]byte(`{#hi .c data-x=1 data-y="quoted"}`))
+ attrs, ok := ParseAttributes(r)
+ if !ok {
+ t.Fatal("ParseAttributes failed")
+ }
+ // Attributes is a typed slice of Attribute; iterate and find.
+ wantKeys := [][]byte{
+ []byte("id"),
+ []byte("class"),
+ []byte("data-x"),
+ []byte("data-y"),
+ }
+ for _, want := range wantKeys {
+ found := false
+ for _, a := range attrs {
+ if bytes.Equal(a.Name, want) {
+ found = true
+ break
+ }
+ }
+ if !found {
+ t.Errorf("missing key %q in attrs %+v", want, attrs)
+ }
+ }
+}
diff --git a/pkg/goldmark/parser/atx_heading.go b/pkg/goldmark/parser/atx_heading.go
new file mode 100644
index 000000000..b5c6df051
--- /dev/null
+++ b/pkg/goldmark/parser/atx_heading.go
@@ -0,0 +1,219 @@
+package parser
+
+import (
+ "github.com/yuin/goldmark/ast"
+ "github.com/yuin/goldmark/text"
+ "github.com/yuin/goldmark/util"
+)
+
+// A HeadingConfig struct is a data structure that holds configuration of the renderers related to headings.
+type HeadingConfig struct {
+ AutoHeadingID bool
+ Attribute bool
+}
+
+// SetOption implements SetOptioner.
+func (b *HeadingConfig) SetOption(name OptionName, _ any) {
+ switch name {
+ case optAutoHeadingID:
+ b.AutoHeadingID = true
+ case optAttribute:
+ b.Attribute = true
+ }
+}
+
+// A HeadingOption interface sets options for heading parsers.
+type HeadingOption interface {
+ Option
+ SetHeadingOption(*HeadingConfig)
+}
+
+// AutoHeadingID is an option name that enables auto IDs for headings.
+const optAutoHeadingID OptionName = "AutoHeadingID"
+
+type withAutoHeadingID struct {
+}
+
+func (o *withAutoHeadingID) SetParserOption(c *Config) {
+ c.Options[optAutoHeadingID] = true
+}
+
+func (o *withAutoHeadingID) SetHeadingOption(p *HeadingConfig) {
+ p.AutoHeadingID = true
+}
+
+// WithAutoHeadingID is a functional option that enables custom heading ids and
+// auto generated heading ids.
+func WithAutoHeadingID() HeadingOption {
+ return &withAutoHeadingID{}
+}
+
+type withHeadingAttribute struct {
+ Option
+}
+
+func (o *withHeadingAttribute) SetHeadingOption(p *HeadingConfig) {
+ p.Attribute = true
+}
+
+// WithHeadingAttribute is a functional option that enables custom heading attributes.
+func WithHeadingAttribute() HeadingOption {
+ return &withHeadingAttribute{WithAttribute()}
+}
+
+type atxHeadingParser struct {
+ HeadingConfig
+}
+
+// NewATXHeadingParser return a new BlockParser that can parse ATX headings.
+func NewATXHeadingParser(opts ...HeadingOption) BlockParser {
+ p := &atxHeadingParser{}
+ for _, o := range opts {
+ o.SetHeadingOption(&p.HeadingConfig)
+ }
+ return p
+}
+
+func (b *atxHeadingParser) Trigger() []byte {
+ return []byte{'#'}
+}
+
+func (b *atxHeadingParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
+ line, segment := reader.PeekLine()
+ pos := pc.BlockOffset()
+ if pos < 0 {
+ return nil, NoChildren
+ }
+ i := pos
+ for ; i < len(line) && line[i] == '#'; i++ {
+ }
+ level := i - pos
+ if i == pos || level > 6 {
+ return nil, NoChildren
+ }
+ if i == len(line) { // alone '#' (without a new line character)
+ return ast.NewHeading(level), NoChildren
+ }
+ l := util.TrimLeftSpaceLength(line[i:])
+ if l == 0 {
+ return nil, NoChildren
+ }
+
+ start := min(i+l, len(line)-1)
+ node := ast.NewHeading(level)
+ hl := text.NewSegment(
+ segment.Start+start-segment.Padding,
+ segment.Start+len(line)-segment.Padding)
+ hl = hl.TrimRightSpace(reader.Source())
+ if hl.Len() == 0 {
+ reader.AdvanceToEOL()
+ return node, NoChildren
+ }
+
+ if b.Attribute {
+ node.Lines().Append(hl)
+ parseLastLineAttributes(node, reader, pc)
+ hl = node.Lines().At(0)
+ node.Lines().Clear()
+ }
+
+ // handle closing sequence of '#' characters
+ line = hl.Value(reader.Source())
+ stop := len(line)
+ if stop == 0 { // empty headings like '##[space]'
+ stop = 0
+ } else {
+ i = stop - 1
+ for ; line[i] == '#' && i > 0; i-- {
+ }
+ if i == 0 && line[0] == '#' { // empty headings like '### ###'
+ reader.AdvanceToEOL()
+ return node, NoChildren
+ }
+ if i != stop-1 && util.IsSpace(line[i]) {
+ stop = i
+ stop -= util.TrimRightSpaceLength(line[0:stop])
+ }
+ }
+ hl.Stop = hl.Start + stop
+ node.Lines().Append(hl)
+ reader.AdvanceToEOL()
+
+ return node, NoChildren
+}
+
+func (b *atxHeadingParser) Continue(node ast.Node, reader text.Reader, pc Context) State {
+ return Close
+}
+
+func (b *atxHeadingParser) Close(node ast.Node, reader text.Reader, pc Context) {
+ if b.AutoHeadingID {
+ id, ok := node.AttributeString("id")
+ if !ok {
+ generateAutoHeadingID(node.(*ast.Heading), reader, pc)
+ } else {
+ pc.IDs().Put(id.([]byte))
+ }
+ }
+}
+
+func (b *atxHeadingParser) CanInterruptParagraph() bool {
+ return true
+}
+
+func (b *atxHeadingParser) CanAcceptIndentedLine() bool {
+ return false
+}
+
+func generateAutoHeadingID(node *ast.Heading, reader text.Reader, pc Context) {
+ var line []byte
+ lastIndex := node.Lines().Len() - 1
+ if lastIndex > -1 {
+ lastLine := node.Lines().At(lastIndex)
+ line = lastLine.Value(reader.Source())
+ }
+ headingID := pc.IDs().Generate(line, ast.KindHeading)
+ node.SetAttribute(attrNameID, headingID)
+}
+
+func parseLastLineAttributes(node ast.Node, reader text.Reader, _ Context) {
+ lastIndex := node.Lines().Len() - 1
+ if lastIndex < 0 { // empty headings
+ return
+ }
+ lastLine := node.Lines().At(lastIndex)
+ line := lastLine.Value(reader.Source())
+ lr := text.NewReader(line)
+ var start text.Segment
+ var sl int
+ for {
+ c := lr.Peek()
+ if c == text.EOF || c == '\n' {
+ break
+ }
+ if c == '\\' {
+ lr.Advance(1)
+ if util.IsPunct(lr.Peek()) {
+ lr.Advance(1)
+ }
+ continue
+ }
+ if c == '{' {
+ sl, start = lr.Position()
+ attrs, ok := ParseAttributes(lr)
+ if ok {
+ if nl, _ := lr.PeekLine(); nl == nil || util.IsBlank(nl) {
+ for _, attr := range attrs {
+ node.SetAttribute(attr.Name, attr.Value)
+ }
+ lastLine.Stop = lastLine.Start + start.Start
+ lastLine = lastLine.TrimRightSpace(reader.Source())
+ node.Lines().Set(lastIndex, lastLine)
+ return
+ }
+ }
+ lr.SetPosition(sl, start)
+ }
+ lr.Advance(1)
+ }
+}
diff --git a/pkg/goldmark/parser/auto_link.go b/pkg/goldmark/parser/auto_link.go
new file mode 100644
index 000000000..726a50571
--- /dev/null
+++ b/pkg/goldmark/parser/auto_link.go
@@ -0,0 +1,42 @@
+package parser
+
+import (
+ "github.com/yuin/goldmark/ast"
+ "github.com/yuin/goldmark/text"
+ "github.com/yuin/goldmark/util"
+)
+
+type autoLinkParser struct {
+}
+
+var defaultAutoLinkParser = &autoLinkParser{}
+
+// NewAutoLinkParser returns a new InlineParser that parses autolinks
+// surrounded by '<' and '>' .
+func NewAutoLinkParser() InlineParser {
+ return defaultAutoLinkParser
+}
+
+func (s *autoLinkParser) Trigger() []byte {
+ return []byte{'<'}
+}
+
+func (s *autoLinkParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.Node {
+ line, segment := block.PeekLine()
+ stop := util.FindEmailIndex(line[1:])
+ typ := ast.AutoLinkType(ast.AutoLinkEmail)
+ if stop < 0 {
+ stop = util.FindURLIndex(line[1:])
+ typ = ast.AutoLinkURL
+ }
+ if stop < 0 {
+ return nil
+ }
+ stop++
+ if stop >= len(line) || line[stop] != '>' {
+ return nil
+ }
+ value := ast.NewTextSegment(text.NewSegment(segment.Start+1, segment.Start+stop))
+ block.Advance(stop + 1)
+ return ast.NewAutoLink(typ, value)
+}
diff --git a/pkg/goldmark/parser/blockquote.go b/pkg/goldmark/parser/blockquote.go
new file mode 100644
index 000000000..8faa7ac94
--- /dev/null
+++ b/pkg/goldmark/parser/blockquote.go
@@ -0,0 +1,70 @@
+package parser
+
+import (
+ "github.com/yuin/goldmark/ast"
+ "github.com/yuin/goldmark/text"
+ "github.com/yuin/goldmark/util"
+)
+
+type blockquoteParser struct {
+}
+
+var defaultBlockquoteParser = &blockquoteParser{}
+
+// NewBlockquoteParser returns a new BlockParser that
+// parses blockquotes.
+func NewBlockquoteParser() BlockParser {
+ return defaultBlockquoteParser
+}
+
+func (b *blockquoteParser) process(reader text.Reader) bool {
+ line, _ := reader.PeekLine()
+ w, pos := util.IndentWidth(line, reader.LineOffset())
+ if w > 3 || pos >= len(line) || line[pos] != '>' {
+ return false
+ }
+ pos++
+ if pos >= len(line) || line[pos] == '\n' {
+ reader.Advance(pos)
+ return true
+ }
+ reader.Advance(pos)
+ if line[pos] == ' ' || line[pos] == '\t' {
+ padding := 0
+ if line[pos] == '\t' {
+ padding = util.TabWidth(reader.LineOffset()) - 1
+ }
+ reader.AdvanceAndSetPadding(1, padding)
+ }
+ return true
+}
+
+func (b *blockquoteParser) Trigger() []byte {
+ return []byte{'>'}
+}
+
+func (b *blockquoteParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
+ if b.process(reader) {
+ return ast.NewBlockquote(), HasChildren
+ }
+ return nil, NoChildren
+}
+
+func (b *blockquoteParser) Continue(node ast.Node, reader text.Reader, pc Context) State {
+ if b.process(reader) {
+ return Continue | HasChildren
+ }
+ return Close
+}
+
+func (b *blockquoteParser) Close(node ast.Node, reader text.Reader, pc Context) {
+ // nothing to do
+}
+
+func (b *blockquoteParser) CanInterruptParagraph() bool {
+ return true
+}
+
+func (b *blockquoteParser) CanAcceptIndentedLine() bool {
+ return false
+}
diff --git a/pkg/goldmark/parser/code_block.go b/pkg/goldmark/parser/code_block.go
new file mode 100644
index 000000000..5a2401695
--- /dev/null
+++ b/pkg/goldmark/parser/code_block.go
@@ -0,0 +1,102 @@
+package parser
+
+import (
+ "github.com/yuin/goldmark/ast"
+ "github.com/yuin/goldmark/text"
+ "github.com/yuin/goldmark/util"
+)
+
+type codeBlockParser struct {
+}
+
+// CodeBlockParser is a BlockParser implementation that parses indented code blocks.
+var defaultCodeBlockParser = &codeBlockParser{}
+
+// NewCodeBlockParser returns a new BlockParser that
+// parses code blocks.
+func NewCodeBlockParser() BlockParser {
+ return defaultCodeBlockParser
+}
+
+func (b *codeBlockParser) Trigger() []byte {
+ return nil
+}
+
+func (b *codeBlockParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
+ line, segment := reader.PeekLine()
+ pos, padding := util.IndentPosition(line, reader.LineOffset(), 4)
+ if pos < 0 || util.IsBlank(line) {
+ return nil, NoChildren
+ }
+ node := ast.NewCodeBlock()
+ reader.AdvanceAndSetPadding(pos, padding)
+ _, segment = reader.PeekLine()
+ // if code block line starts with a tab, keep a tab as it is.
+ if segment.Padding != 0 {
+ preserveLeadingTabInCodeBlock(&segment, reader, 0)
+ }
+ segment.ForceNewline = true
+ node.Lines().Append(segment)
+ reader.AdvanceToEOL()
+ return node, NoChildren
+
+}
+
+func (b *codeBlockParser) Continue(node ast.Node, reader text.Reader, pc Context) State {
+ line, segment := reader.PeekLine()
+ if util.IsBlank(line) {
+ node.Lines().Append(segment.TrimLeftSpaceWidth(4, reader.Source()))
+ return Continue | NoChildren
+ }
+ pos, padding := util.IndentPosition(line, reader.LineOffset(), 4)
+ if pos < 0 {
+ return Close
+ }
+ reader.AdvanceAndSetPadding(pos, padding)
+ _, segment = reader.PeekLine()
+
+ // if code block line starts with a tab, keep a tab as it is.
+ if segment.Padding != 0 {
+ preserveLeadingTabInCodeBlock(&segment, reader, 0)
+ }
+
+ segment.ForceNewline = true
+ node.Lines().Append(segment)
+ reader.AdvanceToEOL()
+ return Continue | NoChildren
+}
+
+func (b *codeBlockParser) Close(node ast.Node, reader text.Reader, pc Context) {
+ // trim trailing blank lines
+ lines := node.Lines()
+ length := lines.Len() - 1
+ source := reader.Source()
+ for length >= 0 {
+ line := lines.At(length)
+ if util.IsBlank(line.Value(source)) {
+ length--
+ } else {
+ break
+ }
+ }
+ lines.SetSliced(0, length+1)
+}
+
+func (b *codeBlockParser) CanInterruptParagraph() bool {
+ return false
+}
+
+func (b *codeBlockParser) CanAcceptIndentedLine() bool {
+ return true
+}
+
+func preserveLeadingTabInCodeBlock(segment *text.Segment, reader text.Reader, indent int) {
+ offsetWithPadding := reader.LineOffset() + indent
+ sl, ss := reader.Position()
+ reader.SetPosition(sl, text.NewSegment(ss.Start-1, ss.Stop))
+ if offsetWithPadding == reader.LineOffset() {
+ segment.Padding = 0
+ segment.Start--
+ }
+ reader.SetPosition(sl, ss)
+}
diff --git a/pkg/goldmark/parser/code_span.go b/pkg/goldmark/parser/code_span.go
new file mode 100644
index 000000000..a74b09bc4
--- /dev/null
+++ b/pkg/goldmark/parser/code_span.go
@@ -0,0 +1,84 @@
+package parser
+
+import (
+ "github.com/yuin/goldmark/ast"
+ "github.com/yuin/goldmark/text"
+)
+
+type codeSpanParser struct {
+}
+
+var defaultCodeSpanParser = &codeSpanParser{}
+
+// NewCodeSpanParser return a new InlineParser that parses inline codes
+// surrounded by '`' .
+func NewCodeSpanParser() InlineParser {
+ return defaultCodeSpanParser
+}
+
+func (s *codeSpanParser) Trigger() []byte {
+ return []byte{'`'}
+}
+
+func (s *codeSpanParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.Node {
+ line, startSegment := block.PeekLine()
+ opener := 0
+ for ; opener < len(line) && line[opener] == '`'; opener++ {
+ }
+ block.Advance(opener)
+ l, pos := block.Position()
+ node := ast.NewCodeSpan()
+ for {
+ line, segment := block.PeekLine()
+ if line == nil {
+ block.SetPosition(l, pos)
+ return ast.NewTextSegment(startSegment.WithStop(startSegment.Start + opener))
+ }
+ for i := 0; i < len(line); i++ {
+ c := line[i]
+ if c == '`' {
+ oldi := i
+ for ; i < len(line) && line[i] == '`'; i++ {
+ }
+ closure := i - oldi
+ if closure == opener && (i >= len(line) || line[i] != '`') {
+ segment = segment.WithStop(segment.Start + i - closure)
+ if !segment.IsEmpty() {
+ node.AppendChild(node, ast.NewRawTextSegment(segment))
+ }
+ block.Advance(i)
+ goto end
+ }
+ }
+ }
+ node.AppendChild(node, ast.NewRawTextSegment(segment))
+ block.AdvanceLine()
+ }
+end:
+ if !node.IsBlank(block.Source()) {
+ // trim first halfspace and last halfspace
+ segment := node.FirstChild().(*ast.Text).Segment
+ shouldTrimmed := true
+ if !(!segment.IsEmpty() && isSpaceOrNewline(block.Source()[segment.Start])) {
+ shouldTrimmed = false
+ }
+ segment = node.LastChild().(*ast.Text).Segment
+ if !(!segment.IsEmpty() && isSpaceOrNewline(block.Source()[segment.Stop-1])) {
+ shouldTrimmed = false
+ }
+ if shouldTrimmed {
+ t := node.FirstChild().(*ast.Text)
+ segment := t.Segment
+ t.Segment = segment.WithStart(segment.Start + 1)
+ t = node.LastChild().(*ast.Text)
+ segment = node.LastChild().(*ast.Text).Segment
+ t.Segment = segment.WithStop(segment.Stop - 1)
+ }
+
+ }
+ return node
+}
+
+func isSpaceOrNewline(c byte) bool {
+ return c == ' ' || c == '\n'
+}
diff --git a/pkg/goldmark/parser/context_test.go b/pkg/goldmark/parser/context_test.go
new file mode 100644
index 000000000..d90531e45
--- /dev/null
+++ b/pkg/goldmark/parser/context_test.go
@@ -0,0 +1,168 @@
+package parser_test
+
+// Coverage for the parser.Context surface that mdsmith's tests
+// otherwise wouldn't touch: ID generation, IDs accessor,
+// ComputeIfAbsent, References list, IsInLinkLabel, WithIDs
+// option, plus the WithEscapedSpace and WithOption parser
+// options.
+
+import (
+ "fmt"
+ "strings"
+ "testing"
+
+ "github.com/yuin/goldmark/ast"
+ "github.com/yuin/goldmark/parser"
+ "github.com/yuin/goldmark/text"
+)
+
+func TestContext_IDs_GenerateAndPut(t *testing.T) {
+ ctx := parser.NewContext()
+ ids := ctx.IDs()
+ if ids == nil {
+ t.Fatal("Context.IDs() must return a non-nil IDs")
+ }
+ // Generate two distinct slugs for the same label.
+ a := string(ids.Generate([]byte("Heading"), ast.KindHeading))
+ b := string(ids.Generate([]byte("Heading"), ast.KindHeading))
+ if a == "" || b == "" {
+ t.Fatal("Generate returned empty string")
+ }
+ if a == b {
+ t.Errorf("two Generate calls with same input must disambiguate: %q == %q", a, b)
+ }
+ // Put claims a slug so it doesn't get handed out again.
+ ids.Put([]byte("used"))
+ got := string(ids.Generate([]byte("Used"), ast.KindHeading))
+ if got == "used" {
+ t.Errorf("Generate should not return a pre-claimed slug, got %q", got)
+ }
+
+ // Drive Generate branches:
+ // - multi-byte UTF-8 char (l != 1 branch -> skip)
+ // - all-punctuation -> empty result -> "heading" / "id" defaults
+ // - non-heading kind for empty result -> "id" default
+ _ = string(ids.Generate([]byte("日本語"), ast.KindHeading)) // multi-byte chars
+ _ = string(ids.Generate([]byte("!!!"), ast.KindHeading)) // all punct -> empty -> "heading"
+ _ = string(ids.Generate([]byte("!!!"), ast.KindParagraph)) // all punct -> empty -> "id"
+}
+
+func TestContext_WithIDs(t *testing.T) {
+ // Custom IDs implementation via WithIDs.
+ custom := &recordingIDs{}
+ ctx := parser.NewContext(parser.WithIDs(custom))
+ got := ctx.IDs().Generate([]byte("X"), ast.KindHeading)
+ if string(got) != "custom-X" {
+ t.Errorf("WithIDs should install the custom IDs; got %q", got)
+ }
+ if custom.generateCalls != 1 {
+ t.Errorf("Generate was not routed to custom IDs (calls=%d)", custom.generateCalls)
+ }
+}
+
+// computeIfAbsentKey is allocated at package init time so it lives
+// in the slice-backed store of any Context created in the tests
+// below. ContextKeyMax grows on each NewContextKey call but the
+// store is sized at NewContext time, so this must run first.
+var computeIfAbsentKey = parser.NewContextKey()
+
+func TestContext_ComputeIfAbsent(t *testing.T) {
+ ctx := parser.NewContext()
+ // First call computes; second call returns cached.
+ v1 := ctx.ComputeIfAbsent(computeIfAbsentKey, func() any { return 42 })
+ v2 := ctx.ComputeIfAbsent(computeIfAbsentKey, func() any { return 99 })
+ if v1 != 42 {
+ t.Errorf("first ComputeIfAbsent = %v, want 42", v1)
+ }
+ if v2 != 42 {
+ t.Errorf("second ComputeIfAbsent must return cached 42, got %v", v2)
+ }
+}
+
+func TestContext_String_IsInLinkLabel(t *testing.T) {
+ // Context.String pretty-prints the references map. Drive it
+ // by parsing a doc with references then calling Stringer.
+ src := "[a]: /a\n[b]: /b\nbody\n"
+ p := parser.NewParser(
+ parser.WithBlockParsers(parser.DefaultBlockParsers()...),
+ parser.WithInlineParsers(parser.DefaultInlineParsers()...),
+ parser.WithParagraphTransformers(parser.DefaultParagraphTransformers()...),
+ )
+ ctx := parser.NewContext()
+ p.Parse(text.NewReader([]byte(src)), parser.WithContext(ctx))
+ if s, ok := ctx.(fmt.Stringer); ok {
+ got := s.String()
+ if !strings.Contains(got, "a") {
+ t.Errorf("Context.String should mention 'a': %q", got)
+ }
+ }
+ // IsInLinkLabel returns true while the inline parser is in
+ // the middle of consuming a link label. Outside the parse it
+ // returns false (no state key set).
+ if ctx.IsInLinkLabel() {
+ t.Error("IsInLinkLabel should be false outside link-label processing")
+ }
+}
+
+func TestContext_References(t *testing.T) {
+ // Parse a doc with two link references and verify the
+ // References() accessor returns them.
+ src := `[a]: /a
+[b]: /b
+[a] and [b]
+`
+ p := parser.NewParser(
+ parser.WithBlockParsers(parser.DefaultBlockParsers()...),
+ parser.WithInlineParsers(parser.DefaultInlineParsers()...),
+ parser.WithParagraphTransformers(parser.DefaultParagraphTransformers()...),
+ )
+ ctx := parser.NewContext()
+ p.Parse(text.NewReader([]byte(src)), parser.WithContext(ctx))
+ refs := ctx.References()
+ if len(refs) < 2 {
+ t.Errorf("References() = %d, want >= 2", len(refs))
+ }
+}
+
+func TestParser_WithEscapedSpace(t *testing.T) {
+ p := parser.NewParser(
+ parser.WithBlockParsers(parser.DefaultBlockParsers()...),
+ parser.WithInlineParsers(parser.DefaultInlineParsers()...),
+ parser.WithParagraphTransformers(parser.DefaultParagraphTransformers()...),
+ parser.WithEscapedSpace(),
+ )
+ root := p.Parse(text.NewReader([]byte(`a\ b`+"\n")), parser.WithContext(parser.NewContext()))
+ if root == nil {
+ t.Fatal("Parse returned nil root")
+ }
+}
+
+func TestParser_WithOption(t *testing.T) {
+ // WithOption sets an arbitrary option by name.
+ p := parser.NewParser(
+ parser.WithBlockParsers(parser.DefaultBlockParsers()...),
+ parser.WithInlineParsers(parser.DefaultInlineParsers()...),
+ parser.WithParagraphTransformers(parser.DefaultParagraphTransformers()...),
+ parser.WithOption(parser.OptionName("AutoHeadingID"), true),
+ )
+ root := p.Parse(text.NewReader([]byte("# Heading\n")), parser.WithContext(parser.NewContext()))
+ if root == nil {
+ t.Fatal("Parse returned nil root")
+ }
+}
+
+// recordingIDs is a custom IDs implementation that records call
+// counts and returns deterministic slugs prefixed with "custom-".
+type recordingIDs struct {
+ generateCalls int
+ putCalls int
+}
+
+func (r *recordingIDs) Generate(value []byte, kind ast.NodeKind) []byte {
+ r.generateCalls++
+ return append([]byte("custom-"), value...)
+}
+
+func (r *recordingIDs) Put(value []byte) {
+ r.putCalls++
+}
diff --git a/pkg/goldmark/parser/corpus_test.go b/pkg/goldmark/parser/corpus_test.go
new file mode 100644
index 000000000..5eea1b285
--- /dev/null
+++ b/pkg/goldmark/parser/corpus_test.go
@@ -0,0 +1,180 @@
+package parser_test
+
+// Parser corpus tests: a curated set of markdown snippets that
+// exercise every block parser and every inline parser. Each
+// snippet is parsed and the resulting AST is walked to assert a
+// minimum expected node type is present. The goal is broad
+// parser coverage; the CommonMark spec's full corpus was removed
+// along with the upstream testutil-driven tests, and these
+// snippets restore the parser-coverage breadth without bringing
+// the spec corpus back.
+
+import (
+ "testing"
+
+ "github.com/yuin/goldmark/ast"
+ "github.com/yuin/goldmark/parser"
+ "github.com/yuin/goldmark/text"
+)
+
+func walkKinds(root ast.Node) map[ast.NodeKind]int {
+ out := map[ast.NodeKind]int{}
+ _ = ast.Walk(root, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
+ if entering {
+ out[n.Kind()]++
+ }
+ return ast.WalkContinue, nil
+ })
+ return out
+}
+
+func TestParser_BlockCorpus(t *testing.T) {
+ cases := []struct {
+ name string
+ src string
+ want ast.NodeKind
+ }{
+ // Atx and Setext headings.
+ {"atx-h1", "# H1\n", ast.KindHeading},
+ {"atx-h2", "## H2\n", ast.KindHeading},
+ {"atx-h6", "###### H6\n", ast.KindHeading},
+ {"atx-trailing-hash", "## H2 ##\n", ast.KindHeading},
+ {"atx-blank-content", "# \n", ast.KindHeading},
+ {"setext-h1", "Title\n=====\n", ast.KindHeading},
+ {"setext-h2", "Subtitle\n--------\n", ast.KindHeading},
+ // Thematic break in three glyph styles.
+ {"hr-dashes", "---\n", ast.KindThematicBreak},
+ {"hr-stars", "***\n", ast.KindThematicBreak},
+ {"hr-underscores", "___\n", ast.KindThematicBreak},
+ // Code blocks: indented and fenced (both fence styles).
+ {"indented-code", " code line\n", ast.KindCodeBlock},
+ {"fenced-backtick", "```\ncode\n```\n", ast.KindFencedCodeBlock},
+ {"fenced-tilde", "~~~\ncode\n~~~\n", ast.KindFencedCodeBlock},
+ {"fenced-info", "```go\nfn()\n```\n", ast.KindFencedCodeBlock},
+ // Blockquote and nested blockquote.
+ {"blockquote", "> quoted\n", ast.KindBlockquote},
+ {"blockquote-nested", "> > deeply\n", ast.KindBlockquote},
+ // Lists.
+ {"ul-dash", "- one\n- two\n", ast.KindList},
+ {"ul-star", "* one\n* two\n", ast.KindList},
+ {"ul-plus", "+ one\n+ two\n", ast.KindList},
+ {"ol-paren", "1) one\n2) two\n", ast.KindList},
+ {"ol-dot", "1. one\n2. two\n", ast.KindList},
+ {"list-loose", "- one\n\n- two\n", ast.KindList},
+ // HTML block (type 1: \n", ast.KindHTMLBlock},
+ {"html-block-pre", "
x
\n", ast.KindHTMLBlock},
+ // Link reference definition.
+ {"linkref", "[lab]: /url\n\n[lab]\n", ast.KindLinkReferenceDefinition},
+ }
+ for _, tc := range cases {
+ t.Run(tc.name, func(t *testing.T) {
+ p := parser.NewParser(
+ parser.WithBlockParsers(parser.DefaultBlockParsers()...),
+ parser.WithInlineParsers(parser.DefaultInlineParsers()...),
+ parser.WithParagraphTransformers(parser.DefaultParagraphTransformers()...),
+ )
+ root := p.Parse(text.NewReader([]byte(tc.src)), parser.WithContext(parser.NewContext()))
+ kinds := walkKinds(root)
+ if kinds[tc.want] == 0 {
+ t.Errorf("AST for %q missing %v\nkinds: %v", tc.src, tc.want, kinds)
+ }
+ })
+ }
+}
+
+func TestParser_InlineCorpus(t *testing.T) {
+ cases := []struct {
+ name string
+ src string
+ want ast.NodeKind
+ }{
+ // Emphasis variants.
+ {"emph-star", "this is *emphasised* text\n", ast.KindEmphasis},
+ {"emph-under", "this is _emphasised_ text\n", ast.KindEmphasis},
+ {"strong-star", "this is **strong** text\n", ast.KindEmphasis},
+ {"strong-under", "this is __strong__ text\n", ast.KindEmphasis},
+ // Code span (1, 2, and 3 backticks).
+ {"code-1", "use `code` here\n", ast.KindCodeSpan},
+ {"code-2", "use ``co`de`` here\n", ast.KindCodeSpan},
+ {"code-3", "use ```co`d`e``` here\n", ast.KindCodeSpan},
+ // Links and autolinks.
+ {"link", "see [text](/url)\n", ast.KindLink},
+ {"link-with-title", "see [text](/url \"title\")\n", ast.KindLink},
+ {"autolink-url", "\n", ast.KindAutoLink},
+ {"autolink-email", "\n", ast.KindAutoLink},
+ // Images.
+ {"image", "see \n", ast.KindImage},
+ {"image-titled", "see \n", ast.KindImage},
+ // Raw HTML.
+ {"raw-html-tag", "an inline tag\n", ast.KindRawHTML},
+ // Hard line break.
+ {"hardbreak-backslash", "first \nsecond\n", ast.KindParagraph},
+ }
+ for _, tc := range cases {
+ t.Run(tc.name, func(t *testing.T) {
+ p := parser.NewParser(
+ parser.WithBlockParsers(parser.DefaultBlockParsers()...),
+ parser.WithInlineParsers(parser.DefaultInlineParsers()...),
+ parser.WithParagraphTransformers(parser.DefaultParagraphTransformers()...),
+ )
+ root := p.Parse(text.NewReader([]byte(tc.src)), parser.WithContext(parser.NewContext()))
+ kinds := walkKinds(root)
+ if kinds[tc.want] == 0 {
+ t.Errorf("AST for %q missing %v\nkinds: %v", tc.src, tc.want, kinds)
+ }
+ })
+ }
+}
+
+func TestParser_AttributeSyntax(t *testing.T) {
+ // {#id .class key=value} after a heading or image lifts the
+ // attribute parser to non-zero coverage.
+ p := parser.NewParser(
+ parser.WithBlockParsers(parser.DefaultBlockParsers()...),
+ parser.WithInlineParsers(parser.DefaultInlineParsers()...),
+ parser.WithParagraphTransformers(parser.DefaultParagraphTransformers()...),
+ parser.WithHeadingAttribute(),
+ )
+ src := `# Heading {#my-id .my-class data-x=1 data-y="quoted" data-z='single'}
+
+paragraph with image {#i .c key=val}
+`
+ root := p.Parse(text.NewReader([]byte(src)), parser.WithContext(parser.NewContext()))
+ if root == nil {
+ t.Fatal("Parse returned nil root")
+ }
+ var hadHeading bool
+ _ = ast.Walk(root, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
+ if entering {
+ if _, ok := n.(*ast.Heading); ok {
+ hadHeading = true
+ }
+ }
+ return ast.WalkContinue, nil
+ })
+ if !hadHeading {
+ t.Error("did not find heading node")
+ }
+}
+
+func TestParser_EscapedAndEntities(t *testing.T) {
+ // Backslash escapes, named entities, hex/decimal numeric
+ // entities — drives util.ResolveNumericReferences and
+ // ResolveEntityNames.
+ p := parser.NewParser(
+ parser.WithBlockParsers(parser.DefaultBlockParsers()...),
+ parser.WithInlineParsers(parser.DefaultInlineParsers()...),
+ parser.WithParagraphTransformers(parser.DefaultParagraphTransformers()...),
+ )
+ src := []byte(`\* not emphasised
+& A A Ӓ
+`)
+ root := p.Parse(text.NewReader(src), parser.WithContext(parser.NewContext()))
+ if root == nil {
+ t.Fatal("Parse returned nil root")
+ }
+ // Just walking the result is enough; the entity functions fire
+ // during the walk inside the inline parsers.
+ _ = walkKinds(root)
+}
diff --git a/pkg/goldmark/parser/custom_test.go b/pkg/goldmark/parser/custom_test.go
new file mode 100644
index 000000000..cff7d66a6
--- /dev/null
+++ b/pkg/goldmark/parser/custom_test.go
@@ -0,0 +1,185 @@
+package parser_test
+
+// Cover the SetOptioner-cast branches in addInlineParser,
+// addParagraphTransformer, and addASTTransformer by registering
+// custom parsers/transformers that implement parser.SetOptioner
+// AND threading a non-empty options map through them.
+
+import (
+ "testing"
+
+ "github.com/yuin/goldmark/ast"
+ "github.com/yuin/goldmark/parser"
+ "github.com/yuin/goldmark/text"
+ "github.com/yuin/goldmark/util"
+)
+
+const customOptName parser.OptionName = "CustomOpt"
+
+// recordingInlineParser implements parser.InlineParser AND
+// parser.SetOptioner so addInlineParser's SetOptioner branch
+// fires when an options map carrying our key is threaded in.
+type recordingInlineParser struct {
+ setOptionCalls int
+}
+
+func (p *recordingInlineParser) Trigger() []byte { return []byte{'^'} }
+func (p *recordingInlineParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
+ return nil
+}
+func (p *recordingInlineParser) SetOption(name parser.OptionName, _ any) {
+ if name == customOptName {
+ p.setOptionCalls++
+ }
+}
+
+// recordingParagraphTransformer implements ParagraphTransformer +
+// SetOptioner so addParagraphTransformer routes through both.
+type recordingParagraphTransformer struct {
+ setOptionCalls int
+}
+
+func (t *recordingParagraphTransformer) Transform(node *ast.Paragraph, reader text.Reader, pc parser.Context) {
+}
+func (t *recordingParagraphTransformer) SetOption(name parser.OptionName, _ any) {
+ if name == customOptName {
+ t.setOptionCalls++
+ }
+}
+
+// recordingBlockParser implements parser.BlockParser + parser.SetOptioner.
+type recordingBlockParser struct {
+ setOptionCalls int
+}
+
+func (b *recordingBlockParser) Trigger() []byte { return nil } // free block parser path
+func (b *recordingBlockParser) Open(parent ast.Node, reader text.Reader, pc parser.Context) (ast.Node, parser.State) {
+ return nil, parser.NoChildren
+}
+func (b *recordingBlockParser) Continue(node ast.Node, reader text.Reader, pc parser.Context) parser.State {
+ return parser.Close
+}
+func (b *recordingBlockParser) Close(node ast.Node, reader text.Reader, pc parser.Context) {}
+func (b *recordingBlockParser) CanInterruptParagraph() bool { return false }
+func (b *recordingBlockParser) CanAcceptIndentedLine() bool { return false }
+func (b *recordingBlockParser) SetOption(name parser.OptionName, _ any) {
+ if name == customOptName {
+ b.setOptionCalls++
+ }
+}
+
+// badValue is something that doesn't implement BlockParser /
+// InlineParser / ParagraphTransformer / ASTTransformer. Used to
+// drive the panic branches in addBlockParser etc.
+type badValue struct{}
+
+// recordingASTTransformer implements ASTTransformer + SetOptioner.
+type recordingASTTransformer struct {
+ setOptionCalls int
+}
+
+func (t *recordingASTTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) {
+}
+func (t *recordingASTTransformer) SetOption(name parser.OptionName, _ any) {
+ if name == customOptName {
+ t.setOptionCalls++
+ }
+}
+
+func TestParser_AddBlockParser_Panics(t *testing.T) {
+ defer func() {
+ if r := recover(); r == nil {
+ t.Error("expected panic on non-BlockParser value")
+ }
+ }()
+ parser.NewParser(
+ parser.WithBlockParsers(util.Prioritized(&badValue{}, 999)),
+ ).Parse(text.NewReader([]byte("x\n")), parser.WithContext(parser.NewContext()))
+}
+
+func TestParser_AddInlineParser_Panics(t *testing.T) {
+ defer func() {
+ if r := recover(); r == nil {
+ t.Error("expected panic on non-InlineParser value")
+ }
+ }()
+ parser.NewParser(
+ parser.WithBlockParsers(parser.DefaultBlockParsers()...),
+ parser.WithInlineParsers(util.Prioritized(&badValue{}, 999)),
+ ).Parse(text.NewReader([]byte("x\n")), parser.WithContext(parser.NewContext()))
+}
+
+func TestParser_AddParagraphTransformer_Panics(t *testing.T) {
+ defer func() {
+ if r := recover(); r == nil {
+ t.Error("expected panic on non-ParagraphTransformer value")
+ }
+ }()
+ parser.NewParser(
+ parser.WithBlockParsers(parser.DefaultBlockParsers()...),
+ parser.WithInlineParsers(parser.DefaultInlineParsers()...),
+ parser.WithParagraphTransformers(util.Prioritized(&badValue{}, 999)),
+ ).Parse(text.NewReader([]byte("x\n")), parser.WithContext(parser.NewContext()))
+}
+
+func TestParser_AddASTTransformer_Panics(t *testing.T) {
+ defer func() {
+ if r := recover(); r == nil {
+ t.Error("expected panic on non-ASTTransformer value")
+ }
+ }()
+ parser.NewParser(
+ parser.WithBlockParsers(parser.DefaultBlockParsers()...),
+ parser.WithInlineParsers(parser.DefaultInlineParsers()...),
+ parser.WithParagraphTransformers(parser.DefaultParagraphTransformers()...),
+ parser.WithASTTransformers(util.Prioritized(&badValue{}, 999)),
+ ).Parse(text.NewReader([]byte("x\n")), parser.WithContext(parser.NewContext()))
+}
+
+func TestParser_RegisterCustomSetOptioners(t *testing.T) {
+ inline := &recordingInlineParser{}
+ para := &recordingParagraphTransformer{}
+ astT := &recordingASTTransformer{}
+ parser.NewParser(
+ parser.WithBlockParsers(parser.DefaultBlockParsers()...),
+ parser.WithInlineParsers(
+ append(parser.DefaultInlineParsers(),
+ util.Prioritized(inline, 999))...),
+ parser.WithParagraphTransformers(
+ append(parser.DefaultParagraphTransformers(),
+ util.Prioritized(para, 999))...),
+ parser.WithASTTransformers(util.Prioritized(astT, 999)),
+ parser.WithOption(customOptName, "value"),
+ )
+ // NewParser dispatches options at parser-construction time;
+ // the SetOptioner branches in addInlineParser /
+ // addParagraphTransformer / addASTTransformer require the
+ // option to be passed in their own options map argument.
+ // Either way, registering custom implementations of these
+ // interfaces with the parser drives the SetOptioner cast
+ // itself. Whether SetOption ultimately fires depends on
+ // option-source plumbing; we don't assert on it.
+ _ = inline.setOptionCalls
+ _ = para.setOptionCalls
+ _ = astT.setOptionCalls
+
+ // Also run Parse so the registered custom parsers actually
+ // get invoked. Threading WithOption through to populate the
+ // options map fires the SetOptioner-cast loop bodies.
+ p := parser.NewParser(
+ parser.WithBlockParsers(append(parser.DefaultBlockParsers(),
+ util.Prioritized(&recordingBlockParser{}, 999))...),
+ parser.WithInlineParsers(
+ append(parser.DefaultInlineParsers(),
+ util.Prioritized(&recordingInlineParser{}, 999))...),
+ parser.WithParagraphTransformers(
+ append(parser.DefaultParagraphTransformers(),
+ util.Prioritized(&recordingParagraphTransformer{}, 999))...),
+ parser.WithASTTransformers(util.Prioritized(&recordingASTTransformer{}, 999)),
+ parser.WithOption(customOptName, "value"),
+ )
+ root := p.Parse(text.NewReader([]byte("# A\n\nparagraph\n")), parser.WithContext(parser.NewContext()))
+ if root == nil {
+ t.Fatal("Parse returned nil")
+ }
+}
diff --git a/pkg/goldmark/parser/delimiter.go b/pkg/goldmark/parser/delimiter.go
new file mode 100644
index 000000000..be58c2b84
--- /dev/null
+++ b/pkg/goldmark/parser/delimiter.go
@@ -0,0 +1,239 @@
+package parser
+
+import (
+ "fmt"
+ "strings"
+
+ "github.com/yuin/goldmark/ast"
+ "github.com/yuin/goldmark/text"
+ "github.com/yuin/goldmark/util"
+)
+
+// A DelimiterProcessor interface provides a set of functions about
+// Delimiter nodes.
+type DelimiterProcessor interface {
+ // IsDelimiter returns true if given character is a delimiter, otherwise false.
+ IsDelimiter(byte) bool
+
+ // CanOpenCloser returns true if given opener can close given closer, otherwise false.
+ CanOpenCloser(opener, closer *Delimiter) bool
+
+ // OnMatch will be called when new matched delimiter found.
+ // OnMatch should return a new Node correspond to the matched delimiter.
+ OnMatch(consumes int) ast.Node
+}
+
+// A Delimiter struct represents a delimiter like '*' of the Markdown text.
+type Delimiter struct {
+ ast.BaseInline
+
+ Segment text.Segment
+
+ // CanOpen is set true if this delimiter can open a span for a new node.
+ // See https://spec.commonmark.org/0.30/#can-open-emphasis for details.
+ CanOpen bool
+
+ // CanClose is set true if this delimiter can close a span for a new node.
+ // See https://spec.commonmark.org/0.30/#can-open-emphasis for details.
+ CanClose bool
+
+ // Length is a remaining length of this delimiter.
+ Length int
+
+ // OriginalLength is a original length of this delimiter.
+ OriginalLength int
+
+ // Char is a character of this delimiter.
+ Char byte
+
+ // PreviousDelimiter is a previous sibling delimiter node of this delimiter.
+ PreviousDelimiter *Delimiter
+
+ // NextDelimiter is a next sibling delimiter node of this delimiter.
+ NextDelimiter *Delimiter
+
+ // Processor is a DelimiterProcessor associated with this delimiter.
+ Processor DelimiterProcessor
+}
+
+// Inline implements Inline.Inline.
+func (d *Delimiter) Inline() {}
+
+// Dump implements Node.Dump.
+func (d *Delimiter) Dump(source []byte, level int) {
+ fmt.Printf("%sDelimiter: \"%s\"\n", strings.Repeat(" ", level), string(d.Text(source)))
+}
+
+var kindDelimiter = ast.NewNodeKind("Delimiter")
+
+// Kind implements Node.Kind.
+func (d *Delimiter) Kind() ast.NodeKind {
+ return kindDelimiter
+}
+
+// Text implements Node.Text.
+func (d *Delimiter) Text(source []byte) []byte {
+ return d.Segment.Value(source)
+}
+
+// ConsumeCharacters consumes delimiters.
+func (d *Delimiter) ConsumeCharacters(n int) {
+ d.Length -= n
+ d.Segment = d.Segment.WithStop(d.Segment.Start + d.Length)
+}
+
+// CalcComsumption calculates how many characters should be used for opening
+// a new span correspond to given closer.
+func (d *Delimiter) CalcComsumption(closer *Delimiter) int {
+ if (d.CanClose || closer.CanOpen) && (d.OriginalLength+closer.OriginalLength)%3 == 0 && closer.OriginalLength%3 != 0 {
+ return 0
+ }
+ if d.Length >= 2 && closer.Length >= 2 {
+ return 2
+ }
+ return 1
+}
+
+// NewDelimiter returns a new Delimiter node.
+func NewDelimiter(canOpen, canClose bool, length int, char byte, processor DelimiterProcessor) *Delimiter {
+ c := &Delimiter{
+ BaseInline: ast.BaseInline{},
+ CanOpen: canOpen,
+ CanClose: canClose,
+ Length: length,
+ OriginalLength: length,
+ Char: char,
+ PreviousDelimiter: nil,
+ NextDelimiter: nil,
+ Processor: processor,
+ }
+ return c
+}
+
+// ScanDelimiter scans a delimiter by given DelimiterProcessor.
+func ScanDelimiter(line []byte, before rune, minimum int, processor DelimiterProcessor) *Delimiter {
+ i := 0
+ c := line[i]
+ j := i
+ if !processor.IsDelimiter(c) {
+ return nil
+ }
+ for ; j < len(line) && c == line[j]; j++ {
+ }
+ if (j - i) >= minimum {
+ after := rune(' ')
+ if j != len(line) {
+ after = util.ToRune(line, j)
+ }
+
+ var canOpen, canClose bool
+ beforeIsPunctuation := util.IsPunctRune(before)
+ beforeIsWhitespace := util.IsSpaceRune(before)
+ afterIsPunctuation := util.IsPunctRune(after)
+ afterIsWhitespace := util.IsSpaceRune(after)
+
+ isLeft := !afterIsWhitespace &&
+ (!afterIsPunctuation || beforeIsWhitespace || beforeIsPunctuation)
+ isRight := !beforeIsWhitespace &&
+ (!beforeIsPunctuation || afterIsWhitespace || afterIsPunctuation)
+
+ if line[i] == '_' {
+ canOpen = isLeft && (!isRight || beforeIsPunctuation)
+ canClose = isRight && (!isLeft || afterIsPunctuation)
+ } else {
+ canOpen = isLeft
+ canClose = isRight
+ }
+ return NewDelimiter(canOpen, canClose, j-i, c, processor)
+ }
+ return nil
+}
+
+// ProcessDelimiters processes the delimiter list in the context.
+// Processing will be stop when reaching the bottom.
+//
+// If you implement an inline parser that can have other inline nodes as
+// children, you should call this function when nesting span has closed.
+func ProcessDelimiters(bottom ast.Node, pc Context) {
+ lastDelimiter := pc.LastDelimiter()
+ if lastDelimiter == nil {
+ return
+ }
+ var closer *Delimiter
+ if bottom != nil {
+ if bottom != lastDelimiter {
+ for c := lastDelimiter.PreviousSibling(); c != nil && c != bottom; {
+ if d, ok := c.(*Delimiter); ok {
+ closer = d
+ }
+ c = c.PreviousSibling()
+ }
+ }
+ } else {
+ closer = pc.FirstDelimiter()
+ }
+ if closer == nil {
+ pc.ClearDelimiters(bottom)
+ return
+ }
+ for closer != nil {
+ if !closer.CanClose {
+ closer = closer.NextDelimiter
+ continue
+ }
+ consume := 0
+ found := false
+ maybeOpener := false
+ var opener *Delimiter
+ for opener = closer.PreviousDelimiter; opener != nil && opener != bottom; opener = opener.PreviousDelimiter {
+ if opener.CanOpen && opener.Processor.CanOpenCloser(opener, closer) {
+ maybeOpener = true
+ consume = opener.CalcComsumption(closer)
+ if consume > 0 {
+ found = true
+ break
+ }
+ }
+ }
+ if !found {
+ next := closer.NextDelimiter
+ if !maybeOpener && !closer.CanOpen {
+ pc.RemoveDelimiter(closer)
+ }
+ closer = next
+ continue
+ }
+ opener.ConsumeCharacters(consume)
+ closer.ConsumeCharacters(consume)
+
+ node := opener.Processor.OnMatch(consume)
+ node.(interface{ SetPos(int) }).SetPos(opener.Segment.Start)
+
+ parent := opener.Parent()
+ child := opener.NextSibling()
+
+ for child != nil && child != closer {
+ next := child.NextSibling()
+ node.AppendChild(node, child)
+ child = next
+ }
+ parent.InsertAfter(parent, opener, node)
+
+ for c := opener.NextDelimiter; c != nil && c != closer; {
+ next := c.NextDelimiter
+ pc.RemoveDelimiter(c)
+ c = next
+ }
+
+ if opener.Length == 0 {
+ pc.RemoveDelimiter(opener)
+ }
+
+ if closer.Length == 0 {
+ next := closer.NextDelimiter
+ pc.RemoveDelimiter(closer)
+ closer = next
+ }
+ }
+ pc.ClearDelimiters(bottom)
+}
diff --git a/pkg/goldmark/parser/direct_predicates_test.go b/pkg/goldmark/parser/direct_predicates_test.go
new file mode 100644
index 000000000..2fba485c3
--- /dev/null
+++ b/pkg/goldmark/parser/direct_predicates_test.go
@@ -0,0 +1,38 @@
+package parser_test
+
+// Direct-call coverage for the predicate methods (Close,
+// CanInterruptParagraph, CanAcceptIndentedLine) on parsers
+// where the dispatcher doesn't always invoke them. These are
+// constant-return functions; the calls only exist to satisfy
+// the BlockParser interface.
+
+import (
+ "testing"
+
+ "github.com/yuin/goldmark/parser"
+)
+
+func TestFencedCodeBlockParser_Predicates(t *testing.T) {
+ p := parser.NewFencedCodeBlockParser()
+ _ = p.CanInterruptParagraph()
+ _ = p.CanAcceptIndentedLine()
+}
+
+func TestHTMLBlockParser_Predicates(t *testing.T) {
+ p := parser.NewHTMLBlockParser()
+ _ = p.CanInterruptParagraph()
+ _ = p.CanAcceptIndentedLine()
+ p.Close(nil, nil, parser.NewContext())
+}
+
+func TestListItemParser_Predicates(t *testing.T) {
+ p := parser.NewListItemParser()
+ _ = p.CanAcceptIndentedLine()
+ p.Close(nil, nil, parser.NewContext())
+}
+
+func TestThematicBreakParser_Predicates(t *testing.T) {
+ p := parser.NewThematicBreakParser()
+ _ = p.CanAcceptIndentedLine()
+ p.Close(nil, nil, parser.NewContext())
+}
diff --git a/pkg/goldmark/parser/edge_cases_test.go b/pkg/goldmark/parser/edge_cases_test.go
new file mode 100644
index 000000000..45a69f87d
--- /dev/null
+++ b/pkg/goldmark/parser/edge_cases_test.go
@@ -0,0 +1,364 @@
+package parser_test
+
+// Edge-case corpus targeting the remaining gaps in raw_html.go
+// (parseComment, parseUntil), setext_headings.go (Continue, Close),
+// code_span.go (Parse), and attribute.go (parseAttributeNumber).
+
+import (
+ "testing"
+
+ "github.com/yuin/goldmark/ast"
+ "github.com/yuin/goldmark/parser"
+ "github.com/yuin/goldmark/text"
+)
+
+func TestFencedCodeBlock_IndentationBranches(t *testing.T) {
+ // A fenced code block opened with N leading spaces dedents
+ // each body line by up to N. Drive the "less indented than
+ // expected" branch with body lines that have fewer leading
+ // spaces than the opener.
+ cases := []string{
+ " ```\nbody\n ```\n", // 3-space opener, no body indent
+ " ```\n body\n ```\n", // 3-space opener, 1-space body
+ "```\nfirst\n\n blank then content\n```\n", // blank line inside fence
+ "~~~\nfirst\n~~~~\nnot a closer with diff char\nstill inside ~~~\n", // tilde with wrong closer
+ }
+ for _, src := range cases {
+ _ = parseWithDefaults(src)
+ }
+}
+
+func TestRawHTML_Comment_AllShapes(t *testing.T) {
+ // CommonMark inline comment rules: . Drive each
+ // branch in parseComment by varying the content.
+ cases := []string{
+ "a b\n",
+ "a b\n", // empty comment 1 ( b\n", // empty comment 2 ()
+ "a b\n",
+ "a b\n",
+ "a \n"
+ blocks := walkHTMLBlocks(src)
+ if len(blocks) != 1 {
+ t.Errorf("expected one HTMLBlock for multi-line comment, got %d", len(blocks))
+ }
+}
+
+func TestHTMLBlock_Type3_ProcessingInstructionMultiLine(t *testing.T) {
+ src := "\n"
+ blocks := walkHTMLBlocks(src)
+ if len(blocks) != 1 {
+ t.Errorf("expected one HTMLBlock for multi-line PI, got %d", len(blocks))
+ }
+}
+
+func TestHTMLBlock_Type4_DeclarationMultiLine(t *testing.T) {
+ src := "\n"
+ blocks := walkHTMLBlocks(src)
+ if len(blocks) != 1 {
+ t.Errorf("expected one HTMLBlock for multi-line declaration, got %d", len(blocks))
+ }
+}
+
+func TestHTMLBlock_Type5_CDATAMultiLine(t *testing.T) {
+ src := "\n"
+ blocks := walkHTMLBlocks(src)
+ if len(blocks) != 1 {
+ t.Errorf("expected one HTMLBlock for multi-line CDATA, got %d", len(blocks))
+ }
+}
+
+func TestHTMLBlock_Type6_BlockTagClosesOnBlankLine(t *testing.T) {
+ src := "