From 769f9ca70c0a531394b440e29a8f3019cd783270 Mon Sep 17 00:00:00 2001 From: Michael Whapples Date: Wed, 3 Jun 2026 12:07:18 +0100 Subject: [PATCH] Handle definition lists. --- .../ebraille/bbx2html/blocks.kt | 40 ++++++-- .../ebraille/bbx2html/containers.kt | 97 ++++++++++++------- 2 files changed, 94 insertions(+), 43 deletions(-) diff --git a/brailleblaster-ebraille/src/main/kotlin/org/brailleblaster/ebraille/bbx2html/blocks.kt b/brailleblaster-ebraille/src/main/kotlin/org/brailleblaster/ebraille/bbx2html/blocks.kt index 7cb6d33c..abc73188 100644 --- a/brailleblaster-ebraille/src/main/kotlin/org/brailleblaster/ebraille/bbx2html/blocks.kt +++ b/brailleblaster-ebraille/src/main/kotlin/org/brailleblaster/ebraille/bbx2html/blocks.kt @@ -21,7 +21,7 @@ import org.brailleblaster.ebraille.asciiToEbraille import org.brailleblaster.utils.xml.UTD_NS import org.brailleblaster.utils.xom.childNodes -internal fun Element.processBlock(): Collection = when(BBX.BLOCK.getSubType(this)) { +internal fun Element.processBlock(): Collection = when (BBX.BLOCK.getSubType(this)) { BBX.BLOCK.STYLE -> processStyle() BBX.BLOCK.LIST_ITEM -> listOf(processParagraph(tag = "li")) BBX.BLOCK.PAGE_NUM -> listOf(processPageNum()) @@ -29,11 +29,12 @@ internal fun Element.processBlock(): Collection = when( else -> listOf(processParagraph()) } -internal fun Element.processPageNum(): org.jsoup.nodes.Element = org.jsoup.nodes.Element("span").attr("role", "doc-pagebreak").apply { - val brl = getFirstChildElement("brl", UTD_NS) - attr("aria-label", brl.getAttributeValue("printPage").orEmpty().ifEmpty { "-" }) - appendText(asciiToEbraille(brl.getAttributeValue("printPageBrl").orEmpty().ifEmpty { "\u2824" })) -} +internal fun Element.processPageNum(): org.jsoup.nodes.Element = + org.jsoup.nodes.Element("span").attr("role", "doc-pagebreak").apply { + val brl = getFirstChildElement("brl", UTD_NS) + attr("aria-label", brl.getAttributeValue("printPage").orEmpty().ifEmpty { "-" }) + appendText(asciiToEbraille(brl.getAttributeValue("printPageBrl").orEmpty().ifEmpty { "\u2824" })) + } private fun Element.processStyle(): Collection = when (style) { "Centered Heading" -> listOf(processParagraph(tag = "h1")) @@ -51,4 +52,29 @@ private fun Element.processParagraph( for ((k, v) in attributes) { attr(k, v) } -}.appendChildren(childNodes.flatMap { it.processContent() }) \ No newline at end of file +}.appendChildren(childNodes.flatMap { it.processContent() }) + +private sealed interface DefinitionListItem { + data class Term(val element: Element) : DefinitionListItem + data class Definition(val elements: List) : DefinitionListItem +} + +internal fun Element.processDefinitionListItem(): List = + childElements.fold(listOf()) { acc, element -> + if (BBX.SPAN.DEFINITION_TERM.isA(element)) { + acc + DefinitionListItem.Term(element) + } else { + val prev = acc.lastOrNull() + if (prev is DefinitionListItem.Definition) { + acc.dropLast(1) + DefinitionListItem.Definition(prev.elements + element) + } else { + acc + DefinitionListItem.Definition(listOf(element)) + } + } + }.map { + when (it) { + is DefinitionListItem.Term -> it.element.processParagraph(tag = "dt") + is DefinitionListItem.Definition -> org.jsoup.nodes.Element("dd") + .appendChildren(it.elements.flatMap { e -> e.processContent() }) + } + } \ No newline at end of file diff --git a/brailleblaster-ebraille/src/main/kotlin/org/brailleblaster/ebraille/bbx2html/containers.kt b/brailleblaster-ebraille/src/main/kotlin/org/brailleblaster/ebraille/bbx2html/containers.kt index c3589724..1146758d 100644 --- a/brailleblaster-ebraille/src/main/kotlin/org/brailleblaster/ebraille/bbx2html/containers.kt +++ b/brailleblaster-ebraille/src/main/kotlin/org/brailleblaster/ebraille/bbx2html/containers.kt @@ -42,43 +42,65 @@ private fun Element.processBox(): org.jsoup.nodes.Element { private fun Element.processList(): org.jsoup.nodes.Element = childElements.filter { BBX.BLOCK.LIST_ITEM.isA(it) } - .map { ListItem(it, it.getAttributeValue("itemLevel", BB_NS)?.toIntOrNull() ?: 0) }.toHtml( - level = 0, - containerFactory = { org.jsoup.nodes.Element("ul").attr("style", "list-style-type: none") } - ) { it.element.processBlock() } - -private fun Element.processTable(): List = if (getAttributeValue("tableCopy", UTD_NS) == "true") { - listOf() -} else { - val tableFormat = getAttributeValue("format") - listOf(org.jsoup.nodes.Element("table").also { - if (tableFormat in listOf("listed", "stairstep", "linear")) { - it.attr("class", tableFormat) - } - }.appendChildren( - childElements.filter { BBX.CONTAINER.TABLE_ROW.isA(it) }.take(1).let { firstRow -> - when (tableFormat) { - "simple" -> { - firstRow.processTableRows(if (getAttributeValue("columnHeading") == "false") "td" else "th") - } - "listed" -> { - firstRow.processTableRows("th") - } - "stairstep" -> { - headerRowFromStairStepTableTN(this) + (firstRow.processTableRows()) + .map { ListItem(it, it.getAttributeValue("itemLevel", BB_NS)?.toIntOrNull() ?: 0) }.let { items -> + when (BBX.CONTAINER.LIST.ATTRIB_LIST_TYPE.get(this)) { + BBX.ListType.DEFINITION -> items.toHtml( + level = 0, + containerFactory = { org.jsoup.nodes.Element("dl") } + ) { + it.element.processDefinitionListItem() } - "linear" -> { - headerRowFromLinearTableTN(this) + (firstRow.processTableRows()) - } - else -> { - firstRow.processTableRows() + else -> items.toHtml( + level = 0, + containerFactory = { org.jsoup.nodes.Element("ul").attr("style", "list-style-type: none") } + ) { + it.element.processBlock() } } - } + (childElements.filter { BBX.CONTAINER.TABLE_ROW.isA(it) }.drop(1).processTableRows()) - )) -} + } + +private fun Element.processTable(): List = + if (getAttributeValue("tableCopy", UTD_NS) == "true") { + listOf() + } else { + val tableFormat = getAttributeValue("format") + listOf( + org.jsoup.nodes.Element("table").also { + if (tableFormat in listOf("listed", "stairstep", "linear")) { + it.attr("class", tableFormat) + } + }.appendChildren( + childElements.filter { BBX.CONTAINER.TABLE_ROW.isA(it) }.take(1).let { firstRow -> + when (tableFormat) { + "simple" -> { + firstRow.processTableRows(if (getAttributeValue("columnHeading") == "false") "td" else "th") + } + + "listed" -> { + firstRow.processTableRows("th") + } + + "stairstep" -> { + headerRowFromStairStepTableTN(this) + (firstRow.processTableRows()) + } + + "linear" -> { + headerRowFromLinearTableTN(this) + (firstRow.processTableRows()) + } -private fun Iterable.processTableRows(cellTag: String = "td"): List = map { r -> org.jsoup.nodes.Element("tr").appendChildren(r.childElements.filter { BBX.BLOCK.TABLE_CELL.isA(it) }.map { c -> org.jsoup.nodes.Element(cellTag).appendChildren(c.processContent())}) } + else -> { + firstRow.processTableRows() + } + } + } + (childElements.filter { BBX.CONTAINER.TABLE_ROW.isA(it) }.drop(1).processTableRows()) + ) + ) + } + +private fun Iterable.processTableRows(cellTag: String = "td"): List = map { r -> + org.jsoup.nodes.Element("tr").appendChildren(r.childElements.filter { BBX.BLOCK.TABLE_CELL.isA(it) } + .map { c -> org.jsoup.nodes.Element(cellTag).appendChildren(c.processContent()) }) +} private fun headerRowFromStairStepTableTN(table: Element): List = (table.previousSibling { it is Element } as? Element)?.let { e -> @@ -86,7 +108,7 @@ private fun headerRowFromStairStepTableTN(table: Element): List b.childElements.filter { BBX.SPAN.OTHER.isA(it) }.map { org.jsoup.nodes.Element("th").appendChildren(it.processContent()) } } - ) + e.childElements.filter { BBX.BLOCK.isA(it) }.takeLast(1).flatMap { b -> + b.childElements.filter { BBX.SPAN.OTHER.isA(it) } + .map { org.jsoup.nodes.Element("th").appendChildren(it.processContent()) } + } + ) ) } else { listOf()