From 602d1bd05ca3491f1bbf057279a81b2a93eb65a3 Mon Sep 17 00:00:00 2001 From: DemchaAV Date: Tue, 9 Jun 2026 00:18:02 +0100 Subject: [PATCH 1/2] perf(render): emit paragraph font/colour operators only when they change The paragraph render handler wrote a setFont (Tf) and setNonStrokingColor (rg) operator for every text span, even across the spans of a single-style paragraph. Track the last-written (font, size) and colour across the paragraph's q...Q block and re-emit only on a real change, invalidating after inline images/shapes; a multi-span single-style paragraph now carries one Tf + one rg instead of one pair per span. Rendered output is unchanged (the skipped operators were redundant). Guarded by the visual-regression suite plus ParagraphTextStateDedupTest, which asserts a single-style paragraph emits one Tf across many drawn spans and that a multi-style paragraph re-emits on each style change. Finding 5. --- CHANGELOG.md | 11 +++ .../PdfParagraphFragmentRenderHandler.java | 58 ++++++++++- .../pdf/ParagraphTextStateDedupTest.java | 95 +++++++++++++++++++ 3 files changed, 160 insertions(+), 4 deletions(-) create mode 100644 src/test/java/com/demcha/compose/document/backend/fixed/pdf/ParagraphTextStateDedupTest.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 0938c80bd..7b2d6c10b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,17 @@ Open cycle — bug-fix / housekeeping. Entries land here as they merge. token). **Output is byte-identical** — the fit predicate is monotonic, so the search returns the same break index. No public API or behaviour change. +- **Paragraph render writes font and colour operators only when they change.** The + paragraph render handler emitted a `setFont` (`Tf`) and `setNonStrokingColor` + (`rg`) operator for *every* text span, even across the spans of a single-style + paragraph. It now tracks the last-written `(font, size)` and colour across the + paragraph's graphics-state block and re-emits only on a real change (invalidating + after inline images/shapes), so a multi-span single-style paragraph carries one + `Tf` + one `rg` instead of one pair per span — fewer operators for PDFBox to + serialize. **Rendered output is unchanged** (the skipped operators were + redundant); pinned by the visual-regression suite plus a content-stream test + asserting one `Tf` across many drawn spans. No public API or behaviour change. + ### Tests / tooling - **Benchmark regression gate and measurement probe (benchmarks module, not part diff --git a/src/main/java/com/demcha/compose/document/backend/fixed/pdf/handlers/PdfParagraphFragmentRenderHandler.java b/src/main/java/com/demcha/compose/document/backend/fixed/pdf/handlers/PdfParagraphFragmentRenderHandler.java index 4da1b44c7..ff55e8a1f 100644 --- a/src/main/java/com/demcha/compose/document/backend/fixed/pdf/handlers/PdfParagraphFragmentRenderHandler.java +++ b/src/main/java/com/demcha/compose/document/backend/fixed/pdf/handlers/PdfParagraphFragmentRenderHandler.java @@ -17,8 +17,10 @@ import com.demcha.compose.font.FontLibrary; import com.demcha.compose.engine.render.pdf.PdfFont; import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; +import java.awt.Color; import java.io.IOException; import java.util.List; @@ -56,6 +58,11 @@ public void render(PlacedFragment fragment, stream.saveGraphicsState(); try { + // Font and non-stroking colour persist across BT/ET within this one + // q...Q block, so track the last-written pair and re-emit Tf/rg only + // when a span actually changes them — a single-style paragraph then + // emits one setFont + one setNonStrokingColor instead of one per span. + TextRenderState textState = new TextRenderState(); double cursorTop = contentTop; for (int lineIndex = 0; lineIndex < payload.lines().size(); lineIndex++) { ParagraphLine line = payload.lines().get(lineIndex); @@ -71,7 +78,7 @@ public void render(PlacedFragment fragment, case LEFT -> innerX; }; - renderLine(stream, fonts, line, lineX, baselineY, environment); + renderLine(stream, fonts, line, lineX, baselineY, environment, textState); cursorTop = lineTop - resolvedLineHeight - payload.lineGap(); } @@ -127,7 +134,8 @@ private void renderLine(PDPageContentStream stream, ParagraphLine line, double lineX, double baselineY, - PdfRenderEnvironment environment) throws IOException { + PdfRenderEnvironment environment, + TextRenderState textState) throws IOException { List spans = line.spans(); if (spans.isEmpty()) { return; @@ -155,8 +163,10 @@ private void renderLine(PDPageContentStream stream, stream.newLineAtOffset((float) cursorX, (float) baselineY); inTextBlock = true; } - stream.setFont(font.fontType(textSpan.textStyle().decoration()), (float) textSpan.textStyle().size()); - stream.setNonStrokingColor(textSpan.textStyle().color()); + textState.applyFont(stream, + font.fontType(textSpan.textStyle().decoration()), + (float) textSpan.textStyle().size()); + textState.applyColor(stream, textSpan.textStyle().color()); stream.showText(text); cursorX += textSpan.width(); } else if (span instanceof ParagraphImageSpan imageSpan) { @@ -176,6 +186,10 @@ private void renderLine(PDPageContentStream stream, (float) imageBottom, (float) imageSpan.width(), (float) imageSpan.height()); + // An inline graphic runs its own graphics-state save/restore and + // colour ops; drop the tracked font/colour so the next text span + // re-emits them rather than trusting persistence across it. + textState.invalidate(); cursorX += imageSpan.width(); } else if (span instanceof ParagraphShapeSpan shapeSpan) { if (inTextBlock) { @@ -184,6 +198,7 @@ private void renderLine(PDPageContentStream stream, } renderShape(stream, shapeSpan, cursorX, baselineY, line.textAscent(), line.baselineOffsetFromBottom(), line.lineHeight()); + textState.invalidate(); cursorX += shapeSpan.width(); } } @@ -287,4 +302,39 @@ private static void renderShape(PDPageContentStream stream, } } + /** + * Tracks the font/size and non-stroking colour last written to the content + * stream within one paragraph's {@code q...Q} block, so the handler emits a + * {@code Tf}/{@code rg} operator only when a span actually changes them. The + * common single-style paragraph then carries one of each instead of one per + * span. {@link #invalidate()} forces a re-emit after anything that may disturb + * the persisted text state (inline images, shapes). + */ + private static final class TextRenderState { + private PDFont font; + private float size = Float.NaN; + private Color color; + + void applyFont(PDPageContentStream stream, PDFont newFont, float newSize) throws IOException { + if (newFont != font || newSize != size) { + stream.setFont(newFont, newSize); + font = newFont; + size = newSize; + } + } + + void applyColor(PDPageContentStream stream, Color newColor) throws IOException { + if (!newColor.equals(color)) { + stream.setNonStrokingColor(newColor); + color = newColor; + } + } + + void invalidate() { + font = null; + size = Float.NaN; + color = null; + } + } + } diff --git a/src/test/java/com/demcha/compose/document/backend/fixed/pdf/ParagraphTextStateDedupTest.java b/src/test/java/com/demcha/compose/document/backend/fixed/pdf/ParagraphTextStateDedupTest.java new file mode 100644 index 000000000..64efa399f --- /dev/null +++ b/src/test/java/com/demcha/compose/document/backend/fixed/pdf/ParagraphTextStateDedupTest.java @@ -0,0 +1,95 @@ +package com.demcha.compose.document.backend.fixed.pdf; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.demcha.compose.GraphCompose; +import com.demcha.compose.document.api.DocumentSession; +import org.apache.pdfbox.Loader; +import org.apache.pdfbox.contentstream.operator.Operator; +import org.apache.pdfbox.pdfparser.PDFStreamParser; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.util.List; + +/** + * Guards Finding 5: the paragraph render handler tracks the last-written font and + * non-stroking colour, so a single-style paragraph that wraps into many spans + * emits one {@code setFont} ({@code Tf}) operator for the whole paragraph + * instead of one per span. + * + *

Renders a real one-page document and inspects the page content stream through + * the established {@link PDFStreamParser} token pattern — the proof lives entirely + * in test scope, with no instrumentation in the render handler.

+ */ +class ParagraphTextStateDedupTest { + + @Test + void singleStyleParagraphEmitsOneFontOperatorAcrossManySpans() throws Exception { + byte[] pdf; + try (DocumentSession session = GraphCompose.document() + .pageSize(400, 800) + .margin(24, 24, 24, 24) + .create()) { + // One uniform style; long enough to wrap into many lines/spans on a + // single page so the dedup is meaningful (without the guard this emits + // a Tf per span). + String body = ("GraphCompose lays out structured documents across pages " + + "while keeping headers and footers stable. ").repeat(8); + session.pageFlow(flow -> flow.addParagraph(p -> p.text(body))); + pdf = session.toPdfBytes(); + } + + try (PDDocument document = Loader.loadPDF(pdf)) { + assertThat(document.getNumberOfPages()) + .describedAs("body is sized to stay on one page so one q...Q block covers every span") + .isEqualTo(1); + int fontOps = operatorCount(document, "Tf"); + int textDraws = operatorCount(document, "Tj") + operatorCount(document, "TJ"); + + assertThat(textDraws) + .describedAs("the paragraph must wrap into several drawn spans for the dedup to be meaningful") + .isGreaterThanOrEqualTo(2); + assertThat(fontOps) + .describedAs("one setFont for the whole single-style paragraph, not one per span") + .isEqualTo(1); + } + } + + @Test + void multiStyleParagraphReEmitsFontOnEachStyleChange() throws Exception { + byte[] pdf; + try (DocumentSession session = GraphCompose.document() + .pageSize(400, 800) + .margin(24, 24, 24, 24) + .create()) { + // Three consecutive runs with distinct decorations (regular / bold / + // regular) on one line: the tracker must re-emit Tf at each change, + // not over-dedup them into a single setFont (which would draw the bold + // run in the regular font). + session.pageFlow(flow -> flow.addParagraph(p -> + p.rich(r -> r.plain("alpha ").bold("bravo ").plain("charlie")))); + pdf = session.toPdfBytes(); + } + + try (PDDocument document = Loader.loadPDF(pdf)) { + assertThat(operatorCount(document, "Tf")) + .describedAs("a style change within a paragraph must re-emit setFont (single-style baseline is 1)") + .isGreaterThanOrEqualTo(2); + } + } + + private static int operatorCount(PDDocument document, String operatorName) throws IOException { + int count = 0; + for (var page : document.getPages()) { + List tokens = new PDFStreamParser(page).parse(); + for (Object token : tokens) { + if (token instanceof Operator operator && operatorName.equals(operator.getName())) { + count++; + } + } + } + return count; + } +} From 6f2f7c99bf0d4c52d064189709a552cc7fe8d420 Mon Sep 17 00:00:00 2001 From: DemchaAV Date: Tue, 9 Jun 2026 00:18:20 +0100 Subject: [PATCH 2/2] perf(layout): sanitize table cell lines once per cell, not three times Resolving a table ran each cell's lines through sanitizeCellLines separately in the natural-width, natural-height and resolve passes, rebuilding the list and its per-line control-character cleanup up to three times per cell. Compute the sanitized lines once when the logical grid is built (LogicalCell.sanitizedLines) and reuse them across all three passes. Output is byte-identical (sanitization is deterministic); on a large table this removes the dominant per-cell layout allocation. Covered by the existing table snapshot/visual tests. Finding 6. --- CHANGELOG.md | 9 +++++++ .../document/layout/TableLayoutSupport.java | 27 +++++++++++-------- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b2d6c10b..790e96a75 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,15 @@ Open cycle — bug-fix / housekeeping. Entries land here as they merge. redundant); pinned by the visual-regression suite plus a content-stream test asserting one `Tf` across many drawn spans. No public API or behaviour change. +- **Table cell text is sanitized once per cell instead of three times.** Resolving + a table ran each cell's lines through `sanitizeCellLines` separately in the + natural-width, natural-height and resolve passes, rebuilding the list and its + per-line control-character cleanup up to three times per cell. The sanitized + lines are now computed once when the logical grid is built and reused by all + three passes. **Output is byte-identical** (sanitization is deterministic); on a + large table this removes the dominant per-cell layout allocation. No public API + or behaviour change. + ### Tests / tooling - **Benchmark regression gate and measurement probe (benchmarks module, not part diff --git a/src/main/java/com/demcha/compose/document/layout/TableLayoutSupport.java b/src/main/java/com/demcha/compose/document/layout/TableLayoutSupport.java index 5ec6494f3..97122527b 100644 --- a/src/main/java/com/demcha/compose/document/layout/TableLayoutSupport.java +++ b/src/main/java/com/demcha/compose/document/layout/TableLayoutSupport.java @@ -95,10 +95,14 @@ record ResolvedTableLayout( * are skipped when iterating later source rows. {@code source} is the * original public {@link DocumentTableCell}, retained so the layout * can detect composed-content cells via - * {@link DocumentTableCell#hasComposedContent()}. + * {@link DocumentTableCell#hasComposedContent()}. {@code sanitizedLines} is + * the cell's text with control characters cleaned, computed once here and + * reused by the width, height and resolve passes instead of re-sanitizing the + * content three times. */ private record LogicalCell(int startRow, int startColumn, int colSpan, int rowSpan, - TableCellContent content, DocumentTableCell source) { + TableCellContent content, DocumentTableCell source, + List sanitizedLines) { } /** @@ -207,7 +211,7 @@ static ResolvedTableLayoutWithContents resolveTableLayout(TableNode node, width, height, yOffset, - sanitizeCellLines(logical.content()), + logical.sanitizedLines(), style, fillInsets(stylesGrid, rowIndex, logical.startColumn(), logical.colSpan()), borderSides(stylesGrid, rowIndex, logical.startColumn(), logical.colSpan()))); @@ -298,7 +302,7 @@ private static double naturalCellHeight(LogicalCell logical, Padding padding = style.padding() == null ? Padding.zero() : style.padding(); return prepared.measureResult().height() + padding.vertical(); } - return cellNaturalHeight(logical.content(), style, measurement); + return cellNaturalHeight(logical.sanitizedLines(), style, measurement); } static PreparedNode sliceTablePreparedNode(TableNode source, @@ -510,8 +514,9 @@ private static List> buildLogicalRows(TableNode node, int colu occupied[r][c] = true; } } + TableCellContent content = toTableCell(cell); logical.add(new LogicalCell(rowIndex, col, cell.colSpan(), cell.rowSpan(), - toTableCell(cell), cell)); + content, cell, sanitizeCellLines(content))); col += cell.colSpan(); } if (sourceIdx < source.size()) { @@ -572,7 +577,7 @@ private static double[] resolveNaturalColumnWidths(TableNode node, } int col = logical.startColumn(); singleCellRequired[col] = Math.max(singleCellRequired[col], - cellNaturalWidth(logical.content(), stylesGrid[rowIndex][col], measurement)); + cellNaturalWidth(logical.sanitizedLines(), stylesGrid[rowIndex][col], measurement)); } } @@ -596,7 +601,7 @@ private static double[] resolveNaturalColumnWidths(TableNode node, } int startCol = logical.startColumn(); int endCol = startCol + logical.colSpan(); - double need = cellNaturalWidth(logical.content(), stylesGrid[rowIndex][startCol], measurement); + double need = cellNaturalWidth(logical.sanitizedLines(), stylesGrid[rowIndex][startCol], measurement); double have = sumRange(widths, startCol, endCol); if (need <= have + EPS) { continue; @@ -669,22 +674,22 @@ private static double[] resolveFinalColumnWidths(TableNode node, return finalWidths; } - private static double cellNaturalWidth(TableCellContent cell, + private static double cellNaturalWidth(List sanitizedLines, TableCellLayoutStyle style, TextMeasurementSystem measurement) { Padding padding = style.padding() == null ? Padding.zero() : style.padding(); double maxWidth = 0.0; - for (String line : sanitizeCellLines(cell)) { + for (String line : sanitizedLines) { maxWidth = Math.max(maxWidth, measurement.textWidth(style.textStyle(), line)); } return maxWidth + padding.horizontal(); } - private static double cellNaturalHeight(TableCellContent cell, + private static double cellNaturalHeight(List sanitizedLines, TableCellLayoutStyle style, TextMeasurementSystem measurement) { Padding padding = style.padding() == null ? Padding.zero() : style.padding(); - int lineCount = Math.max(1, sanitizeCellLines(cell).size()); + int lineCount = Math.max(1, sanitizedLines.size()); return (lineCount * measurement.lineHeight(style.textStyle())) + ((lineCount - 1) * tableCellLineSpacing(style)) + padding.vertical();