From e3a3795b98d4df4db64ad913d65e591fd0118cb7 Mon Sep 17 00:00:00 2001 From: Ken Perry Date: Thu, 28 May 2026 16:12:28 -0400 Subject: [PATCH] fix(docx-import): map List Paragraph style to BBX list type Added a Pandoc Lua pre-filter to convert custom-style List Paragraph/Paragraph List blocks into real list blocks before BBX conversion. Sorry but I had to add another lua file. If I did not it would not catch the list before it was removed from the paragraphs and it only needed to work with docx. This might be another good reason to try to go all native. I will start looking into that. This should work for now though. This bug came from my PNC files. It is rare but Now that I have this I have one more thing to do before I can replace duxbery in the PNC code. Since this was only for my special list paragraph style I made it for DOCX import execution to apply the new filter only for DOCX inputs.eserved BBX output compatibility by keeping list conversion in the existing Pandoc/BBX pipeline. Fixed the tests so they supported the extra lua file. --- .../main/dist/programData/pandoc/lua/bbx.lua | 36 +++++++++++ .../pandoc/lua/list-paragraph-style.lua | 55 +++++++++++++++++ .../archiver2/PandocArchiverLoader.kt | 24 ++++++-- .../pandoc/PandocLuaDefinitionListTest.kt | 61 +++++++++++++++++-- 4 files changed, 165 insertions(+), 11 deletions(-) create mode 100644 brailleblaster-core/src/main/dist/programData/pandoc/lua/list-paragraph-style.lua diff --git a/brailleblaster-core/src/main/dist/programData/pandoc/lua/bbx.lua b/brailleblaster-core/src/main/dist/programData/pandoc/lua/bbx.lua index 67d44500..c5bc3f3b 100644 --- a/brailleblaster-core/src/main/dist/programData/pandoc/lua/bbx.lua +++ b/brailleblaster-core/src/main/dist/programData/pandoc/lua/bbx.lua @@ -658,7 +658,43 @@ function RawBlock(format, str) return t end + local function getCustomStyle(attr) + if attr == nil or type(attr) ~= "table" then + return nil + end + + if attr.attributes ~= nil and type(attr.attributes) == "table" then + return attr.attributes["custom-style"] + end + + return attr["custom-style"] + end + + local function isListParagraphStyle(customStyle) + if customStyle == nil then + return false + end + + local normalized = string.lower(customStyle) + return normalized == "list paragraph" or normalized == "paragraph list" + end + function Div(s, attr) + local customStyle = getCustomStyle(attr) + if isListParagraphStyle(customStyle) then + local item = removeTags(s) + if item == nil then + item = '' + end + if string.len(string.gsub(item, '%s+', '')) == 0 then + return '' + end + + return '' + .. '' .. item .. '' + .. '' + end + return s end diff --git a/brailleblaster-core/src/main/dist/programData/pandoc/lua/list-paragraph-style.lua b/brailleblaster-core/src/main/dist/programData/pandoc/lua/list-paragraph-style.lua new file mode 100644 index 00000000..b08f2813 --- /dev/null +++ b/brailleblaster-core/src/main/dist/programData/pandoc/lua/list-paragraph-style.lua @@ -0,0 +1,55 @@ +-- Convert DOCX paragraphs with custom-style "List Paragraph" into real Pandoc bullet lists +-- before bbx.lua runs, so they import as BBX list types. + +local function customStyle(attr) + if attr == nil then + return nil + end + + local attrs = attr.attributes + if attrs ~= nil then + return attrs["custom-style"] + end + + return nil +end + +local function isListParagraphDiv(block) + if block == nil or block.t ~= "Div" then + return false + end + + local style = customStyle(block.attr) + if style == nil then + return false + end + + local normalized = string.lower(style) + return normalized == "list paragraph" or normalized == "paragraph list" +end + +function Pandoc(doc) + local out = {} + local i = 1 + + while i <= #doc.blocks do + local block = doc.blocks[i] + + if isListParagraphDiv(block) then + local items = {} + + while i <= #doc.blocks and isListParagraphDiv(doc.blocks[i]) do + local div = doc.blocks[i] + items[#items + 1] = div.content + i = i + 1 + end + + out[#out + 1] = pandoc.BulletList(items) + else + out[#out + 1] = block + i = i + 1 + end + end + + return pandoc.Pandoc(out, doc.meta) +end diff --git a/brailleblaster-core/src/main/java/org/brailleblaster/archiver2/PandocArchiverLoader.kt b/brailleblaster-core/src/main/java/org/brailleblaster/archiver2/PandocArchiverLoader.kt index 05dd2de2..f9fa4e1e 100644 --- a/brailleblaster-core/src/main/java/org/brailleblaster/archiver2/PandocArchiverLoader.kt +++ b/brailleblaster-core/src/main/java/org/brailleblaster/archiver2/PandocArchiverLoader.kt @@ -96,12 +96,26 @@ object PandocArchiverLoader : ArchiverFactory.FileLoader { val bbFile = File.createTempFile(newFilename, ".bbx") bbFile.deleteOnExit() newFilename = bbFile.absolutePath - val pb = ProcessBuilder( - PANDOC_CMD, "--from=$fromFormat", - "--to=bbx.lua", - "--output=" + bbFile.absolutePath, - filename + val command = mutableListOf( + PANDOC_CMD, + "--from=$fromFormat" ) + + // DOCX custom paragraph style "List Paragraph" arrives as Div(custom-style) + // and must be normalized to a real Pandoc list before bbx.lua is applied. + if (fromFormat?.startsWith("docx") == true) { + command.add("--lua-filter=list-paragraph-style.lua") + } + + command.addAll( + listOf( + "--to=bbx.lua", + "--output=" + bbFile.absolutePath, + filename + ) + ) + + val pb = ProcessBuilder(command) .directory(wrkDir) .redirectError(ProcessBuilder.Redirect.INHERIT) .redirectErrorStream(true) diff --git a/brailleblaster-core/src/test/java/org/brailleblaster/pandoc/PandocLuaDefinitionListTest.kt b/brailleblaster-core/src/test/java/org/brailleblaster/pandoc/PandocLuaDefinitionListTest.kt index 3d510be2..787aa0cb 100644 --- a/brailleblaster-core/src/test/java/org/brailleblaster/pandoc/PandocLuaDefinitionListTest.kt +++ b/brailleblaster-core/src/test/java/org/brailleblaster/pandoc/PandocLuaDefinitionListTest.kt @@ -51,6 +51,11 @@ class PandocLuaDefinitionListTest { */ @Throws(Exception::class) private fun runPandoc(html: String): Document { + return runPandoc(html, emptyList()) + } + + @Throws(Exception::class) + private fun runPandoc(html: String, luaFilters: List): Document { val htmlFile = File.createTempFile("bb-deflist-test-", ".html") htmlFile.deleteOnExit() FileWriter(htmlFile).use { fw -> @@ -59,13 +64,22 @@ class PandocLuaDefinitionListTest { val bbxFile = File.createTempFile("bb-deflist-out-", ".bbx") bbxFile.deleteOnExit() - val pb = ProcessBuilder( + val command = mutableListOf( PANDOC_CMD, - "--from=html+empty_paragraphs", - "--to=bbx.lua", - "--output=" + bbxFile.absolutePath, - htmlFile.absolutePath - ).directory(File(luaDir)) + "--from=html+empty_paragraphs" + ) + luaFilters.forEach { filter -> + command.add("--lua-filter=$filter") + } + command.addAll( + listOf( + "--to=bbx.lua", + "--output=" + bbxFile.absolutePath, + htmlFile.absolutePath + ) + ) + + val pb = ProcessBuilder(command).directory(File(luaDir)) pb.environment()["PANDOCCMD"] = PANDOC_CMD pb.redirectErrorStream(true) @@ -91,6 +105,15 @@ class PandocLuaDefinitionListTest { return nodes.get(0) as Element } + /** Returns the first CONTAINER with bb:listType="NORMAL", or fails the test. */ + private fun findFirstNormalList(doc: Document): Element { + val nodes = doc.query( + "//*[local-name()='CONTAINER' and @*[local-name()='listType']='NORMAL']" + ) + Assert.assertNotEquals(nodes.size(), 0, "No NORMAL list container found in BBX output:\n" + doc.toXML()) + return nodes.get(0) as Element + } + /** Asserts that a LIST_ITEM block has the expected term text and definition text. */ private fun assertDefinitionItem(item: Element, expectedTerm: String, expectedDef: String) { Assert.assertEquals(item.localName, "BLOCK") @@ -186,6 +209,32 @@ class PandocLuaDefinitionListTest { ) } + // ------------------------------------------------------------------------- + // DOCX custom-style="List Paragraph" represented as a Div should map to BBX NORMAL list + // ------------------------------------------------------------------------- + @Test + @Throws(Exception::class) + fun customStyleListParagraphMapsToNormalList() { + val html = ("" + + "

Total Loan Amount $33,333.33

" + + "") + + val doc = runPandoc(html, listOf("list-paragraph-style.lua")) + val list = findFirstNormalList(doc) + + Assert.assertEquals(list.getAttributeValue("listLevel", BB_NS), "0") + Assert.assertEquals(list.getChildCount(), 1, "Expected one LIST_ITEM under NORMAL list") + + val item = list.getChild(0) as Element + Assert.assertEquals(item.localName, "BLOCK") + Assert.assertEquals(item.getAttributeValue("type", BB_NS), "LIST_ITEM") + Assert.assertEquals(item.getAttributeValue("itemLevel", BB_NS), "0") + Assert.assertTrue( + item.value.contains("Total Loan Amount $33,333.33"), + "LIST_ITEM should contain the source paragraph text" + ) + } + // ------------------------------------------------------------------------- // Multiple
per
// -------------------------------------------------------------------------