diff --git a/brailleblaster-core/src/main/dist/programData/pandoc/lua/bbx.lua b/brailleblaster-core/src/main/dist/programData/pandoc/lua/bbx.lua index 67d44500..c5bc3f3b 100644 --- a/brailleblaster-core/src/main/dist/programData/pandoc/lua/bbx.lua +++ b/brailleblaster-core/src/main/dist/programData/pandoc/lua/bbx.lua @@ -658,7 +658,43 @@ function RawBlock(format, str) return t end + local function getCustomStyle(attr) + if attr == nil or type(attr) ~= "table" then + return nil + end + + if attr.attributes ~= nil and type(attr.attributes) == "table" then + return attr.attributes["custom-style"] + end + + return attr["custom-style"] + end + + local function isListParagraphStyle(customStyle) + if customStyle == nil then + return false + end + + local normalized = string.lower(customStyle) + return normalized == "list paragraph" or normalized == "paragraph list" + end + function Div(s, attr) + local customStyle = getCustomStyle(attr) + if isListParagraphStyle(customStyle) then + local item = removeTags(s) + if item == nil then + item = '' + end + if string.len(string.gsub(item, '%s+', '')) == 0 then + return '' + end + + return '' + .. '' .. item .. '' + .. '' + end + return s end diff --git a/brailleblaster-core/src/main/dist/programData/pandoc/lua/list-paragraph-style.lua b/brailleblaster-core/src/main/dist/programData/pandoc/lua/list-paragraph-style.lua new file mode 100644 index 00000000..b08f2813 --- /dev/null +++ b/brailleblaster-core/src/main/dist/programData/pandoc/lua/list-paragraph-style.lua @@ -0,0 +1,55 @@ +-- Convert DOCX paragraphs with custom-style "List Paragraph" into real Pandoc bullet lists +-- before bbx.lua runs, so they import as BBX list types. + +local function customStyle(attr) + if attr == nil then + return nil + end + + local attrs = attr.attributes + if attrs ~= nil then + return attrs["custom-style"] + end + + return nil +end + +local function isListParagraphDiv(block) + if block == nil or block.t ~= "Div" then + return false + end + + local style = customStyle(block.attr) + if style == nil then + return false + end + + local normalized = string.lower(style) + return normalized == "list paragraph" or normalized == "paragraph list" +end + +function Pandoc(doc) + local out = {} + local i = 1 + + while i <= #doc.blocks do + local block = doc.blocks[i] + + if isListParagraphDiv(block) then + local items = {} + + while i <= #doc.blocks and isListParagraphDiv(doc.blocks[i]) do + local div = doc.blocks[i] + items[#items + 1] = div.content + i = i + 1 + end + + out[#out + 1] = pandoc.BulletList(items) + else + out[#out + 1] = block + i = i + 1 + end + end + + return pandoc.Pandoc(out, doc.meta) +end diff --git a/brailleblaster-core/src/main/java/org/brailleblaster/archiver2/PandocArchiverLoader.kt b/brailleblaster-core/src/main/java/org/brailleblaster/archiver2/PandocArchiverLoader.kt index 05dd2de2..f9fa4e1e 100644 --- a/brailleblaster-core/src/main/java/org/brailleblaster/archiver2/PandocArchiverLoader.kt +++ b/brailleblaster-core/src/main/java/org/brailleblaster/archiver2/PandocArchiverLoader.kt @@ -96,12 +96,26 @@ object PandocArchiverLoader : ArchiverFactory.FileLoader { val bbFile = File.createTempFile(newFilename, ".bbx") bbFile.deleteOnExit() newFilename = bbFile.absolutePath - val pb = ProcessBuilder( - PANDOC_CMD, "--from=$fromFormat", - "--to=bbx.lua", - "--output=" + bbFile.absolutePath, - filename + val command = mutableListOf( + PANDOC_CMD, + "--from=$fromFormat" ) + + // DOCX custom paragraph style "List Paragraph" arrives as Div(custom-style) + // and must be normalized to a real Pandoc list before bbx.lua is applied. + if (fromFormat?.startsWith("docx") == true) { + command.add("--lua-filter=list-paragraph-style.lua") + } + + command.addAll( + listOf( + "--to=bbx.lua", + "--output=" + bbFile.absolutePath, + filename + ) + ) + + val pb = ProcessBuilder(command) .directory(wrkDir) .redirectError(ProcessBuilder.Redirect.INHERIT) .redirectErrorStream(true) diff --git a/brailleblaster-core/src/test/java/org/brailleblaster/pandoc/PandocLuaDefinitionListTest.kt b/brailleblaster-core/src/test/java/org/brailleblaster/pandoc/PandocLuaDefinitionListTest.kt index 3d510be2..787aa0cb 100644 --- a/brailleblaster-core/src/test/java/org/brailleblaster/pandoc/PandocLuaDefinitionListTest.kt +++ b/brailleblaster-core/src/test/java/org/brailleblaster/pandoc/PandocLuaDefinitionListTest.kt @@ -51,6 +51,11 @@ class PandocLuaDefinitionListTest { */ @Throws(Exception::class) private fun runPandoc(html: String): Document { + return runPandoc(html, emptyList()) + } + + @Throws(Exception::class) + private fun runPandoc(html: String, luaFilters: List): Document { val htmlFile = File.createTempFile("bb-deflist-test-", ".html") htmlFile.deleteOnExit() FileWriter(htmlFile).use { fw -> @@ -59,13 +64,22 @@ class PandocLuaDefinitionListTest { val bbxFile = File.createTempFile("bb-deflist-out-", ".bbx") bbxFile.deleteOnExit() - val pb = ProcessBuilder( + val command = mutableListOf( PANDOC_CMD, - "--from=html+empty_paragraphs", - "--to=bbx.lua", - "--output=" + bbxFile.absolutePath, - htmlFile.absolutePath - ).directory(File(luaDir)) + "--from=html+empty_paragraphs" + ) + luaFilters.forEach { filter -> + command.add("--lua-filter=$filter") + } + command.addAll( + listOf( + "--to=bbx.lua", + "--output=" + bbxFile.absolutePath, + htmlFile.absolutePath + ) + ) + + val pb = ProcessBuilder(command).directory(File(luaDir)) pb.environment()["PANDOCCMD"] = PANDOC_CMD pb.redirectErrorStream(true) @@ -91,6 +105,15 @@ class PandocLuaDefinitionListTest { return nodes.get(0) as Element } + /** Returns the first CONTAINER with bb:listType="NORMAL", or fails the test. */ + private fun findFirstNormalList(doc: Document): Element { + val nodes = doc.query( + "//*[local-name()='CONTAINER' and @*[local-name()='listType']='NORMAL']" + ) + Assert.assertNotEquals(nodes.size(), 0, "No NORMAL list container found in BBX output:\n" + doc.toXML()) + return nodes.get(0) as Element + } + /** Asserts that a LIST_ITEM block has the expected term text and definition text. */ private fun assertDefinitionItem(item: Element, expectedTerm: String, expectedDef: String) { Assert.assertEquals(item.localName, "BLOCK") @@ -186,6 +209,32 @@ class PandocLuaDefinitionListTest { ) } + // ------------------------------------------------------------------------- + // DOCX custom-style="List Paragraph" represented as a Div should map to BBX NORMAL list + // ------------------------------------------------------------------------- + @Test + @Throws(Exception::class) + fun customStyleListParagraphMapsToNormalList() { + val html = ("" + + "

Total Loan Amount $33,333.33

" + + "") + + val doc = runPandoc(html, listOf("list-paragraph-style.lua")) + val list = findFirstNormalList(doc) + + Assert.assertEquals(list.getAttributeValue("listLevel", BB_NS), "0") + Assert.assertEquals(list.getChildCount(), 1, "Expected one LIST_ITEM under NORMAL list") + + val item = list.getChild(0) as Element + Assert.assertEquals(item.localName, "BLOCK") + Assert.assertEquals(item.getAttributeValue("type", BB_NS), "LIST_ITEM") + Assert.assertEquals(item.getAttributeValue("itemLevel", BB_NS), "0") + Assert.assertTrue( + item.value.contains("Total Loan Amount $33,333.33"), + "LIST_ITEM should contain the source paragraph text" + ) + } + // ------------------------------------------------------------------------- // Multiple
per
// -------------------------------------------------------------------------