From 83d1d600e7cab1741676f0f51baa0325599ccdc2 Mon Sep 17 00:00:00 2001 From: DemchaAV Date: Fri, 12 Jun 2026 00:39:31 +0100 Subject: [PATCH] fix(docx): list export resolves markers through the shared PDF rules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The semantic Word backend fell back to the flat-list marker for nested items (PDF uses the depth cascade) and skipped flat-item normalization (author-typed '- item' doubled as '• - item'; blank items emitted marker-only paragraphs). Marker knowledge now has one owner in the node package - ListMarker.defaultForDepth(int) and ListMarker.normalizeItemText(String, boolean) (@since 1.8.0) - and TextFlowSupport and DocxSemanticBackend both delegate to it. Two DocxSemanticBackendTest expectations updated from the old divergent behavior to the parity contract; 6 new DocxListParityTest cases. Full verify: BUILD SUCCESS. --- CHANGELOG.md | 10 ++ .../backend/semantic/DocxSemanticBackend.java | 24 +++- .../document/layout/TextFlowSupport.java | 45 +------- .../compose/document/node/ListMarker.java | 64 ++++++++++ .../backend/semantic/DocxListParityTest.java | 109 ++++++++++++++++++ .../semantic/DocxSemanticBackendTest.java | 16 +-- 6 files changed, 213 insertions(+), 55 deletions(-) create mode 100644 src/test/java/com/demcha/compose/document/backend/semantic/DocxListParityTest.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 81cb6794a..729ab7bb7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -140,6 +140,16 @@ Entries land here as they merge. trailing space — with another literal space, so every exported item read `"• text"`, and markerless lists gained a stray leading space. The export now uses `ListMarker.prefix()`, matching the fixed-layout text pipeline. +- **DOCX list export fully matches the PDF list pipeline.** The semantic Word + backend resolved nested-item marker fallbacks against the flat-list marker + and skipped flat-item normalization, so the two outputs of one session + disagreed: a nested item without an explicit marker exported as the list + bullet where the PDF renders the depth cascade (`•` → `◦` → `▪` → `·`), + an author-typed `"- item"` doubled up as `"• - item"`, and blank items + produced marker-only paragraphs. Both rules now live in one shared place — + `ListMarker.defaultForDepth(int)` and + `ListMarker.normalizeItemText(String, boolean)` (`@since 1.8.0`) — and the + fixed-layout pipeline and the DOCX export both call them. ### Documentation diff --git a/src/main/java/com/demcha/compose/document/backend/semantic/DocxSemanticBackend.java b/src/main/java/com/demcha/compose/document/backend/semantic/DocxSemanticBackend.java index 64610103f..310b0743b 100644 --- a/src/main/java/com/demcha/compose/document/backend/semantic/DocxSemanticBackend.java +++ b/src/main/java/com/demcha/compose/document/backend/semantic/DocxSemanticBackend.java @@ -159,14 +159,24 @@ private void writeNode(XWPFDocument document, DocumentNode node) throws Exceptio /** * Semantic list mapping: each item becomes a marker-prefixed paragraph in - * the list's text style; nested items indent two spaces per depth and use - * their own marker when one is set. + * the list's text style. Flat items run through the same + * {@code ListMarker.normalizeItemText} step as fixed-layout rendering + * (author-typed markers stripped, blank items skipped); nested items + * indent two spaces per depth and use their own marker when one is set, + * falling back to {@code ListMarker.defaultForDepth} otherwise. */ private void writeList(XWPFDocument document, com.demcha.compose.document.node.ListNode list) { for (String item : list.items()) { + // Same normalization as the fixed-layout pipeline: strip an + // author-typed leading marker and skip items with no content. + String normalized = com.demcha.compose.document.node.ListMarker + .normalizeItemText(item, list.normalizeMarkers()); + if (normalized.isBlank()) { + continue; + } writeListLine(document, list.textStyle(), - list.marker().prefix() + item, 0); + list.marker().prefix() + normalized, 0); } for (com.demcha.compose.document.node.ListItem item : list.nestedItems()) { writeNestedItem(document, list, item, 0); @@ -178,9 +188,13 @@ private void writeNestedItem(XWPFDocument document, com.demcha.compose.document.node.ListItem item, int depth) { // prefix() carries its own trailing space (and is empty for - // markerless lists), matching the fixed-layout text pipeline. + // markerless lists). Items without an explicit (or markerFor-baked) + // marker fall back to the same depth cascade the fixed-layout + // pipeline uses — never to the flat-list marker. com.demcha.compose.document.node.ListMarker marker = - item.marker() != null ? item.marker() : list.marker(); + item.marker() != null + ? item.marker() + : com.demcha.compose.document.node.ListMarker.defaultForDepth(depth); writeListLine(document, list.textStyle(), marker.prefix() + item.label(), depth); for (com.demcha.compose.document.node.ListItem child : item.children()) { writeNestedItem(document, list, child, depth + 1); diff --git a/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java b/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java index 8ec5aa7a5..c3f084248 100644 --- a/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java +++ b/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java @@ -195,7 +195,7 @@ private static ListNode flattenNestedListNode(ListNode node) { private static void flattenNestedItems(List items, int depth, List output) { for (ListItem item : items) { - ListMarker marker = item.marker() != null ? item.marker() : defaultMarkerForDepth(depth); + ListMarker marker = item.marker() != null ? item.marker() : ListMarker.defaultForDepth(depth); StringBuilder prefix = new StringBuilder(NESTED_LIST_INDENT_UNIT.repeat(depth)); if (marker.isVisible()) { // ListMarker.normalize already appends a trailing space @@ -210,20 +210,6 @@ private static void flattenNestedItems(List items, int depth, List ListMarker.bullet(); // • - case 1 -> new ListMarker("◦"); // ◦ - case 2 -> new ListMarker("▪"); // ▪ - default -> new ListMarker("·"); // · - }; - } - /** * Splits a prepared list at whole-item boundaries, falling back to * splitting the first item's lines when no whole item fits. @@ -350,7 +336,7 @@ private static PreparedListLayout prepareListLayout(ListNode node, boolean markdownEnabled) { List items = new ArrayList<>(); for (String item : node.items()) { - String normalizedItem = normalizeListItem(item, node.normalizeMarkers()); + String normalizedItem = ListMarker.normalizeItemText(item, node.normalizeMarkers()); if (normalizedItem.isBlank()) { continue; } @@ -517,33 +503,6 @@ private static double listItemsHeight(List items, double return total; } - private static String normalizeListItem(String value, boolean normalizeMarkers) { - String safe = value == null ? "" : value; - if (!normalizeMarkers) { - // Preserve raw whitespace and any author-supplied marker - // characters. Used by the nested-list flatten path so the - // depth-based indent prefix survives layout. - return safe; - } - String normalized = safe.trim(); - if (normalized.isEmpty()) { - return normalized; - } - if (normalized.startsWith("•")) { - return normalized.substring(1).trim(); - } - if (normalized.startsWith("- ")) { - return normalized.substring(2).trim(); - } - if (normalized.startsWith("+ ")) { - return normalized.substring(2).trim(); - } - if (normalized.startsWith("* ") && !normalized.startsWith("**")) { - return normalized.substring(2).trim(); - } - return normalized; - } - // ------------------------------------------------------------------ // Paragraph layout core // ------------------------------------------------------------------ diff --git a/src/main/java/com/demcha/compose/document/node/ListMarker.java b/src/main/java/com/demcha/compose/document/node/ListMarker.java index 2b01286d3..fa19caa25 100644 --- a/src/main/java/com/demcha/compose/document/node/ListMarker.java +++ b/src/main/java/com/demcha/compose/document/node/ListMarker.java @@ -56,6 +56,70 @@ public static ListMarker custom(String marker) { return new ListMarker(marker); } + /** + * Returns the built-in marker for a nested-list depth, used when neither + * the item itself nor {@code ListBuilder.markerFor(depth, ...)} supplied + * one: {@code •} at depth 0, {@code ◦} at depth 1, {@code ▪} at depth 2, + * and {@code ·} below that. Fixed-layout rendering and the semantic DOCX + * export both resolve their fallback through this single cascade so the + * two outputs of one session always agree. + * + * @param depth zero-based nesting depth + * @return default marker for the depth + * @since 1.8.0 + */ + public static ListMarker defaultForDepth(int depth) { + return switch (depth) { + case 0 -> bullet(); // • + case 1 -> new ListMarker("◦"); // ◦ + case 2 -> new ListMarker("▪"); // ▪ + default -> new ListMarker("·"); // · + }; + } + + /** + * Normalizes an author-supplied flat list item before a marker prefix is + * applied: trims the text and strips one leading author-typed marker + * ({@code •}, {@code "- "}, {@code "+ "}, or {@code "* "} — but not a + * {@code **bold} run) so the typed marker does not double up with the + * rendered one. When {@code normalizeMarkers} is {@code false} the value + * is returned unchanged apart from null-safety. A blank result means the + * item carries no renderable content and should be skipped, matching + * fixed-layout rendering. + * + * @param value raw author-supplied item text; {@code null} is + * treated as empty + * @param normalizeMarkers whether author-typed markers are stripped + * @return normalized item text, possibly blank + * @since 1.8.0 + */ + public static String normalizeItemText(String value, boolean normalizeMarkers) { + String safe = value == null ? "" : value; + if (!normalizeMarkers) { + // Preserve raw whitespace and any author-supplied marker + // characters. Used by the nested-list flatten path so the + // depth-based indent prefix survives layout. + return safe; + } + String normalized = safe.trim(); + if (normalized.isEmpty()) { + return normalized; + } + if (normalized.startsWith("•")) { + return normalized.substring(1).trim(); + } + if (normalized.startsWith("- ")) { + return normalized.substring(2).trim(); + } + if (normalized.startsWith("+ ")) { + return normalized.substring(2).trim(); + } + if (normalized.startsWith("* ") && !normalized.startsWith("**")) { + return normalized.substring(2).trim(); + } + return normalized; + } + /** * Returns {@code true} when this marker has non-whitespace content. * diff --git a/src/test/java/com/demcha/compose/document/backend/semantic/DocxListParityTest.java b/src/test/java/com/demcha/compose/document/backend/semantic/DocxListParityTest.java new file mode 100644 index 000000000..d417955c6 --- /dev/null +++ b/src/test/java/com/demcha/compose/document/backend/semantic/DocxListParityTest.java @@ -0,0 +1,109 @@ +package com.demcha.compose.document.backend.semantic; + +import com.demcha.compose.GraphCompose; +import com.demcha.compose.document.api.DocumentSession; +import com.demcha.compose.document.node.ListMarker; +import com.demcha.compose.document.style.DocumentInsets; +import org.apache.poi.xwpf.usermodel.XWPFDocument; +import org.apache.poi.xwpf.usermodel.XWPFParagraph; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.DisabledIfSystemProperty; + +import java.io.ByteArrayInputStream; +import java.util.List; +import java.util.function.Consumer; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * PDF ↔ DOCX list parity: the semantic Word export must resolve markers and + * item text through the same shared rules as fixed-layout rendering — the + * {@link ListMarker#defaultForDepth(int)} cascade for nested fallbacks and + * {@link ListMarker#normalizeItemText(String, boolean)} for flat items — + * so both outputs of one session agree. + */ +@DisabledIfSystemProperty(named = "no.poi", matches = "true", + disabledReason = "DocxSemanticBackend requires poi-ooxml; the no-poi profile validates the rest of the suite without it") +class DocxListParityTest { + + @Test + void nestedFallbackFollowsTheDepthCascade() throws Exception { + List texts = exportTexts(flow -> flow + .addList(list -> list + .name("Outline") + .addItem("alpha", l1 -> l1 + .addItem("beta", l2 -> l2 + .addItem("gamma"))))); + + assertThat(texts).contains("• alpha", " ◦ beta", " ▪ gamma"); + } + + @Test + void explicitMarkersStillBeatTheCascade() throws Exception { + List texts = exportTexts(flow -> flow + .addList(list -> list + .name("Outline") + .markerFor(1, ListMarker.custom("→")) + .addItem("alpha", l1 -> l1.addItem("beta")))); + + assertThat(texts).contains(" → beta"); + assertThat(texts).doesNotContain(" ◦ beta"); + } + + @Test + void flatItemsStripAuthorTypedMarkers() throws Exception { + List texts = exportTexts(flow -> flow + .addList("- dashed", "• bulleted", "* starred", "+ plussed")); + + assertThat(texts).contains("• dashed", "• bulleted", "• starred", "• plussed"); + assertThat(texts).noneMatch(t -> t.startsWith("• - ") || t.startsWith("• • ")); + } + + @Test + void boldLeadIsNotMistakenForAMarker() throws Exception { + List texts = exportTexts(flow -> flow + .addList("**bold** lead stays intact")); + + assertThat(texts).contains("• **bold** lead stays intact"); + } + + @Test + void blankFlatItemsAreDropped() throws Exception { + List texts = exportTexts(flow -> flow + .addList("kept", "", " ")); + + assertThat(texts).contains("• kept"); + // No marker-only paragraphs for the blank items. + assertThat(texts).noneMatch(t -> t.trim().equals("•")); + } + + @Test + void normalizeMarkersFalsePreservesRawItems() throws Exception { + List texts = exportTexts(flow -> flow + .addList(list -> list + .name("Raw") + .normalizeMarkers(false) + .items("- raw dash survives"))); + + assertThat(texts).contains("• - raw dash survives"); + } + + private static List exportTexts( + Consumer author) throws Exception { + byte[] docxBytes; + try (DocumentSession session = GraphCompose.document() + .pageSize(595, 842) + .margin(DocumentInsets.of(36)) + .create()) { + var flow = session.dsl().pageFlow().name("Flow"); + author.accept(flow); + flow.build(); + docxBytes = session.export(new DocxSemanticBackend()); + } + try (XWPFDocument document = new XWPFDocument(new ByteArrayInputStream(docxBytes))) { + return document.getParagraphs().stream() + .map(XWPFParagraph::getText) + .toList(); + } + } +} diff --git a/src/test/java/com/demcha/compose/document/backend/semantic/DocxSemanticBackendTest.java b/src/test/java/com/demcha/compose/document/backend/semantic/DocxSemanticBackendTest.java index a951fe508..7491effc6 100644 --- a/src/test/java/com/demcha/compose/document/backend/semantic/DocxSemanticBackendTest.java +++ b/src/test/java/com/demcha/compose/document/backend/semantic/DocxSemanticBackendTest.java @@ -111,12 +111,12 @@ void nestedListItemsIndentTwoSpacesPerDepth() throws Exception { List texts = document.getParagraphs().stream() .map(XWPFParagraph::getText).toList(); // Two spaces of indent per depth; without per-item markers the - // semantic export falls back to the list's top-level bullet at - // every level (the visual depth cascade is a layout-pass concern). + // semantic export falls back to the same depth cascade the + // fixed-layout pipeline uses (• ◦ ▪), so PDF and DOCX agree. assertThat(texts).contains( "• Level zero", - " • Level one", - " • Level two"); + " ◦ Level one", + " ▪ Level two"); } } @@ -139,10 +139,12 @@ void nestedListItemsKeepTheirCustomMarkers() throws Exception { try (XWPFDocument document = new XWPFDocument(new ByteArrayInputStream(docxBytes))) { List texts = document.getParagraphs().stream() .map(XWPFParagraph::getText).toList(); - // The top-level custom marker and the per-depth override both - // survive the export. + // The per-depth override survives; the flat-list marker("→") does + // not leak into nested fallbacks — depth 0 takes the cascade + // bullet exactly as fixed-layout rendering does (markerFor(0, ...) + // is the way to control depth 0). assertThat(texts).contains( - "→ Root", + "• Root", " ‣ Child"); } }