From 776ef7caeb853f7c7f0a27b128234f3f773a0a2b Mon Sep 17 00:00:00 2001 From: Joseph Ottinger Date: Mon, 8 Jun 2026 07:43:10 -0400 Subject: [PATCH 1/2] feature: add hebrew, arabic, hungarian text * This is a proof of a feature, not a change. It validates that GraphCompose can render Hebrew, Arabic, and Hungarian texts properly in a PDF by adding tests. Closes #140 --- .../presets/EngineeringResumeSmokeTest.java | 143 ++++++++++++++++++ 1 file changed, 143 insertions(+) diff --git a/src/test/java/com/demcha/compose/document/templates/cv/v2/presets/EngineeringResumeSmokeTest.java b/src/test/java/com/demcha/compose/document/templates/cv/v2/presets/EngineeringResumeSmokeTest.java index ca181216a..8997c3f38 100644 --- a/src/test/java/com/demcha/compose/document/templates/cv/v2/presets/EngineeringResumeSmokeTest.java +++ b/src/test/java/com/demcha/compose/document/templates/cv/v2/presets/EngineeringResumeSmokeTest.java @@ -12,9 +12,21 @@ import com.demcha.compose.document.templates.cv.v2.data.RowsSection; import com.demcha.compose.document.templates.cv.v2.data.SkillsSection; import com.demcha.compose.document.templates.cv.v2.theme.CvTheme; +import com.demcha.compose.document.templates.cv.v2.theme.CvTypography; +import com.demcha.compose.font.FontFamilyDefinition; +import com.demcha.compose.font.FontName; +import org.apache.pdfbox.Loader; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.text.PDFTextStripper; import org.junit.jupiter.api.Test; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assumptions.assumeTrue; /** * Smoke test for the v2 Engineering Resume preset. Covers the navy @@ -24,6 +36,17 @@ */ class EngineeringResumeSmokeTest { + // this flag exists to allow us to emit the PDF if we want to exmine it visually. + private static final boolean SAVE_MULTILINGUAL_PDF = true; + private static final FontName MULTISCRIPT_FONT = FontName.of("Test Multiscript"); + // Hungarian sample: classic accented-Latin unicode test phrase, + // often glossed as "floodproof mirror-drilling machine". + private static final String HUNGARIAN_TEXT = "Árvíztűrő tükörfúrógép"; + // Hebrew sample: "shalom olam" / "hello world". + private static final String HEBREW_TEXT = "שלום עולם"; + // Arabic sample: "marhaban bil-alam" / "hello world". + private static final String ARABIC_TEXT = "مرحبا بالعالم"; + @Test void exposes_stable_identity() { DocumentTemplate template = EngineeringResume.create(); @@ -42,6 +65,46 @@ void custom_theme_factory_renders() throws Exception { fullDocument()); } + @Test + void custom_multiscript_theme_renders_hungarian_hebrew_and_arabic_to_pdf() throws Exception { + Path fontPath = resolveMultiscriptFont(); + assumeTrue(fontPath != null, "requires a local font with Hebrew + Arabic + Latin coverage"); + + DocumentTemplate template = EngineeringResume.create(multiscriptTheme()); + + try (DocumentSession session = GraphCompose.document() + .pageSize(420, 595) + .margin(DocumentInsets.of(20)) + .create()) { + session.registerFontFamily(FontFamilyDefinition.files(MULTISCRIPT_FONT, fontPath).build()); + template.compose(session, multilingualDocument()); + + byte[] pdfBytes = session.toPdfBytes(); + maybeWriteMultilingualPdf(pdfBytes); + assertThat(pdfBytes).hasSizeGreaterThan(500); + assertThat(new String(pdfBytes, 0, 5, StandardCharsets.US_ASCII)).isEqualTo("%PDF-"); + + try (PDDocument document = Loader.loadPDF(pdfBytes)) { + assertThat(document.getNumberOfPages()).isGreaterThan(0); + + String extracted = new PDFTextStripper().getText(document); + // Hungarian is the LTR Unicode control in this test: it proves + // the preset can carry non-English text through a real resume + // render without introducing RTL ordering/shaping ambiguity. + assertThat(extracted).contains(HUNGARIAN_TEXT); + + // We intentionally do not assert exact PDFTextStripper output + // for Hebrew or Arabic. PDF text extraction is heuristic and + // may reorder or normalize RTL runs independently of whether + // the renderer successfully painted the correct glyphs. + // + // The thing being testeded here is resume-template PDF generation + // with a font that genuinely covers Hebrew + Arabic + Latin, + // not PDFBox's exact bidi extraction behavior. + } + } + } + private static void renderAndAssertNonEmpty( DocumentTemplate template, CvDocument doc) throws Exception { @@ -90,4 +153,84 @@ private static CvDocument fullDocument() { .build()) .build(); } + + private static CvDocument multilingualDocument() { + return CvDocument.builder() + .identity(CvIdentity.builder() + .name("János", "Farkas") + .jobTitle("Senior Platform Engineer") + .contact("+36 30 555 0101", "janos@example.dev", "Budapest") + .link("LinkedIn", "https://linkedin.com/in/janos-farkas") + .link("GitHub", "https://github.com/janos") + .build()) + .sections( + new ParagraphSection("Professional Summary", + "Builds reliable multilingual document pipelines. Hungarian sample: " + + HUNGARIAN_TEXT), + SkillsSection.builder("Technical Skills") + .group("Languages", "Java 21", "Kotlin", "SQL") + .group("Internationalisation", HEBREW_TEXT, ARABIC_TEXT) + .build(), + EntriesSection.builder("Education & Certifications") + .entry("MSc Computer Science", + "Budapest University", + "2019-2021", + "Focused on document systems.") + .build(), + RowsSection.builder("Projects", RowStyle.BULLETED_STACKED) + .row("GraphCompose", + "Resume PDF generation with multilingual content.") + .build(), + EntriesSection.builder("Professional Experience") + .entry("Senior Platform Engineer", "Acme", + "2021-2024", + "Shipped CV rendering and localization workflows.") + .build(), + RowsSection.builder("Additional Information", RowStyle.PLAIN) + .row("Hebrew sample", HEBREW_TEXT) + .row("Arabic sample", ARABIC_TEXT) + .row("Hungarian sample", HUNGARIAN_TEXT) + .build()) + .build(); + } + + private static CvTheme multiscriptTheme() { + CvTheme base = CvTheme.engineeringResume(); + CvTypography typography = new CvTypography( + MULTISCRIPT_FONT, + MULTISCRIPT_FONT, + base.typography().sizeHeadline(), + base.typography().sizeContact(), + base.typography().sizeBanner(), + base.typography().sizeEntryTitle(), + base.typography().sizeEntryDate(), + base.typography().sizeEntrySubtitle(), + base.typography().sizeBody(), + base.typography().bodyLineSpacing()); + return new CvTheme(base.palette(), typography, base.spacing(), base.decoration()); + } + + private static Path resolveMultiscriptFont() { + List candidates = List.of( + Path.of("/Library/Fonts/Arial Unicode.ttf"), + Path.of("/System/Library/Fonts/Supplemental/Arial Unicode.ttf"), + Path.of("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"), + Path.of("/usr/share/fonts/truetype/freefont/FreeSans.ttf")); + + for (Path candidate : candidates) { + if (Files.isRegularFile(candidate)) { + return candidate; + } + } + return null; + } + + private static void maybeWriteMultilingualPdf(byte[] pdfBytes) throws Exception { + if (!SAVE_MULTILINGUAL_PDF) { + return; + } + Path output = Path.of("target", "visual-tests", "cv-v2", "engineering-resume-multilingual.pdf"); + Files.createDirectories(output.getParent()); + Files.write(output, pdfBytes); + } } From ebca03cf8b8fa586f242eec937d30191bca19186 Mon Sep 17 00:00:00 2001 From: DemchaAV Date: Mon, 8 Jun 2026 14:01:51 +0100 Subject: [PATCH 2/2] test(rtl): make multiscript font lookup cross-platform (add Windows font paths) --- .../cv/v2/presets/EngineeringResumeSmokeTest.java | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/test/java/com/demcha/compose/document/templates/cv/v2/presets/EngineeringResumeSmokeTest.java b/src/test/java/com/demcha/compose/document/templates/cv/v2/presets/EngineeringResumeSmokeTest.java index 8997c3f38..facc8e1bb 100644 --- a/src/test/java/com/demcha/compose/document/templates/cv/v2/presets/EngineeringResumeSmokeTest.java +++ b/src/test/java/com/demcha/compose/document/templates/cv/v2/presets/EngineeringResumeSmokeTest.java @@ -4,13 +4,7 @@ import com.demcha.compose.document.api.DocumentSession; import com.demcha.compose.document.style.DocumentInsets; import com.demcha.compose.document.templates.api.DocumentTemplate; -import com.demcha.compose.document.templates.cv.v2.data.CvDocument; -import com.demcha.compose.document.templates.cv.v2.data.CvIdentity; -import com.demcha.compose.document.templates.cv.v2.data.EntriesSection; -import com.demcha.compose.document.templates.cv.v2.data.ParagraphSection; -import com.demcha.compose.document.templates.cv.v2.data.RowStyle; -import com.demcha.compose.document.templates.cv.v2.data.RowsSection; -import com.demcha.compose.document.templates.cv.v2.data.SkillsSection; +import com.demcha.compose.document.templates.cv.v2.data.*; import com.demcha.compose.document.templates.cv.v2.theme.CvTheme; import com.demcha.compose.document.templates.cv.v2.theme.CvTypography; import com.demcha.compose.font.FontFamilyDefinition; @@ -215,7 +209,11 @@ private static Path resolveMultiscriptFont() { Path.of("/Library/Fonts/Arial Unicode.ttf"), Path.of("/System/Library/Fonts/Supplemental/Arial Unicode.ttf"), Path.of("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"), - Path.of("/usr/share/fonts/truetype/freefont/FreeSans.ttf")); + Path.of("/usr/share/fonts/truetype/freefont/FreeSans.ttf"), + // Windows: Tahoma / Arial / Segoe UI all cover Latin + Hebrew + Arabic. + Path.of("C:/Windows/Fonts/tahoma.ttf"), + Path.of("C:/Windows/Fonts/arial.ttf"), + Path.of("C:/Windows/Fonts/segoeui.ttf")); for (Path candidate : candidates) { if (Files.isRegularFile(candidate)) {