DemchaAV · DemchaAV · Jun 14, 2026 · Jun 14, 2026 · Jun 14, 2026 · Jun 14, 2026
@@ -337,6 +337,35 @@ Entries land here as they merge.
 
 ### Internal
 
+- **Benchmark suite cleanup (not shipped).** Removed three redundant
+  benchmark mains: `FullCvBenchmark` (superseded by the JMH
+  `TemplateCvJmhBenchmark`), `GraphComposeBenchmark` (early-engine relic
+  duplicating `CurrentSpeedBenchmark`'s `engine-simple` scenario), and
+  `ScalabilityBenchmark` (its thread-scaling sweep folded into
+  `CurrentSpeedBenchmark`'s full-profile throughput run, now `1,2,4,8,16`).
+  Dropped the matching `run-benchmarks.ps1` steps and doc entries.
+- **Feature-object benchmarks for the v1.8 vector surface (not shipped).**
+  The suite previously exercised only text/table primitives. Added JMH render
+  benches and deterministic probes over the new vector features:
+  `SvgJmhBenchmark` (path parse / whole-file icon read / icon→node) plus a
+  `SvgParseAllocProbe`; `ChartJmhBenchmark` (bar + line + pie render) plus a
+  `ChartAllocProbe` (layout-compile allocation); `VectorRenderOperatorProbe`
+  (the same paths drawn flat vs. gradient vs. translucent, counted as PDF
+  content-stream operators); `IconRampJmhBenchmark` (icon-placement scaling,
+  `@Param` 8/32/128); and `MixedShowcaseJmhBenchmark` (one document combining
+  prose, inline sparklines, bar + pie charts, SVG icons and a gradient path).
+  Shared `SvgBenchmarkFixtures` / `ChartBenchmarkFixtures` hold the inputs so
+  each bench and its probe measure identical data.
+- **Current-speed report carries a stage breakdown and a run summary (not
+  shipped).** `CurrentSpeedBenchmark` persists a per-scenario compose / layout /
+  render split (`stages[]`, median ms) to the JSON and a `stages` CSV, and
+  writes a readable `summary.md`. `BenchmarkDiffTool` consumes `stages[]`,
+  prints a per-stage delta table, and reports the scenarios added/removed
+  between two runs.
+- **Every current-speed scenario is now covered by the smoke perf gate (not
+  shipped).** The `long-token` scenario previously had no SMOKE threshold and
+  silently escaped the gate; it now has one, and `CurrentSpeedScenarioGateTest`
+  fails the build if any scenario lacks a threshold.
 - **Removed the `java.awt.*` / `java.util.*` co-wildcard in four files.**
   `InvoiceTemplateComposer`, `ProposalTemplateComposer`,
   `WeeklyScheduleTemplateComposer`, and the engine `PdfRenderingSystemECS`

@@ -63,13 +63,11 @@
 |---|---|
 | `CurrentSpeedBenchmark` | Default scenario runner — what CI's `perf-smoke` job exercises. Takes a `-Dgraphcompose.benchmark.profile=smoke\|full\|stress` switch. |
 | `ComparativeBenchmark` | Renders the same fixtures through GraphCompose, iText, openHTMLToPDF, JasperReports. **Rough local comparison only** — see "When not to use" above. |
-| `FullCvBenchmark`, `ScalabilityBenchmark` | Fixture-specific runners for CV and table-heavy scenarios. |
 | `CanonicalBenchmarkSupport`, `BenchmarkSupport` | Shared fixture builders + measurement helpers. |
 | `BenchmarkReportWriter` | Writes JSON / CSV / text reports under `benchmarks/target/benchmarks/`. |
 | `BenchmarkDiffTool` | Compares two JSON reports and prints a delta table. Useful for pre/post comparisons. |
 | `BenchmarkMedianTool` | Median + dispersion across N runs of the same scenario. |
 | `GraphComposeStressTest`, `EnduranceTest` | Long-running stress / endurance harnesses. |
-| `GraphComposeBenchmark` | Legacy entry point preserved for one downstream caller. New work should target `CurrentSpeedBenchmark`. |
 
 ## Running
 

@@ -93,6 +93,31 @@ private void diffCurrentSpeed(DiffInput input,
                     signedPercent(row.peakHeapMbDeltaPct()));
         }
 
+        if (!report.addedScenarios().isEmpty() || !report.removedScenarios().isEmpty()) {
+            System.out.println();
+            System.out.println("Scenario set changes");
+            System.out.println("  Added in candidate:    "
+                    + (report.addedScenarios().isEmpty() ? "(none)" : String.join(", ", report.addedScenarios())));
+            System.out.println("  Removed from baseline: "
+                    + (report.removedScenarios().isEmpty() ? "(none)" : String.join(", ", report.removedScenarios())));
+        }
+
+        if (!report.stages().isEmpty()) {
+            System.out.println();
+            System.out.println("Stage diff (pct delta per stage)");
+            System.out.printf("%-18s | %12s | %12s | %12s | %12s%n",
+                    "Scenario", "Compose pct", "Layout pct", "Render pct", "Total pct");
+            System.out.println("-".repeat(78));
+            for (StageDiff row : report.stages()) {
+                System.out.printf("%-18s | %12s | %12s | %12s | %12s%n",
+                        row.scenario(),
+                        signedPercent(row.composeDeltaPct()),
+                        signedPercent(row.layoutDeltaPct()),
+                        signedPercent(row.renderDeltaPct()),
+                        signedPercent(row.totalDeltaPct()));
+            }
+        }
+
         System.out.println();
         System.out.println("Throughput diff");
         System.out.printf("%-18s | %8s | %12s | %14s%n",
@@ -143,10 +168,29 @@ private void diffCurrentSpeed(DiffInput input,
                                 format(row.candidateAvgMillisPerDoc()),
                                 format(row.avgMillisPerDocDeltaPct())))
                         .toList());
+        Path stagesCsv = artifacts.writeCsv(
+                "stages-diff",
+                List.of("scenario", "baseline_compose_ms", "candidate_compose_ms", "compose_delta_pct", "baseline_layout_ms", "candidate_layout_ms", "layout_delta_pct", "baseline_render_ms", "candidate_render_ms", "render_delta_pct", "baseline_total_ms", "candidate_total_ms", "total_delta_pct"),
+                report.stages().stream()
+                        .map(row -> List.of(
+                                row.scenario(),
+                                format(row.baselineComposeMillis()),
+                                format(row.candidateComposeMillis()),
+                                format(row.composeDeltaPct()),
+                                format(row.baselineLayoutMillis()),
+                                format(row.candidateLayoutMillis()),
+                                format(row.layoutDeltaPct()),
+                                format(row.baselineRenderMillis()),
+                                format(row.candidateRenderMillis()),
+                                format(row.renderDeltaPct()),
+                                format(row.baselineTotalMillis()),
+                                format(row.candidateTotalMillis()),
+                                format(row.totalDeltaPct())))
+                        .toList());
 
         System.out.println();
         System.out.println("Saved JSON diff report to " + jsonPath);
-        System.out.println("Saved CSV diff reports to " + latencyCsv + " and " + throughputCsv);
+        System.out.println("Saved CSV diff reports to " + latencyCsv + ", " + throughputCsv + ", and " + stagesCsv);
     }
 
     private void diffComparative(DiffInput input,
@@ -214,6 +258,29 @@ private CurrentSpeedDiffReport buildCurrentSpeedDiff(DiffInput input, JsonNode b
                 })
                 .toList();
 
+        Map<String, JsonNode> baselineStages = indexBy(baseline.path("stages"), "scenario");
+        Map<String, JsonNode> candidateStages = indexBy(candidate.path("stages"), "scenario");
+        List<StageDiff> stageDiffs = intersectKeys(baselineStages, candidateStages).stream()
+                .map(key -> {
+                    JsonNode before = baselineStages.get(key);
+                    JsonNode after = candidateStages.get(key);
+                    return new StageDiff(
+                            key,
+                            before.path("composeMillis").asDouble(),
+                            after.path("composeMillis").asDouble(),
+                            percentDelta(before.path("composeMillis").asDouble(), after.path("composeMillis").asDouble()),
+                            before.path("layoutMillis").asDouble(),
+                            after.path("layoutMillis").asDouble(),
+                            percentDelta(before.path("layoutMillis").asDouble(), after.path("layoutMillis").asDouble()),
+                            before.path("renderMillis").asDouble(),
+                            after.path("renderMillis").asDouble(),
+                            percentDelta(before.path("renderMillis").asDouble(), after.path("renderMillis").asDouble()),
+                            before.path("totalMillis").asDouble(),
+                            after.path("totalMillis").asDouble(),
+                            percentDelta(before.path("totalMillis").asDouble(), after.path("totalMillis").asDouble()));
+                })
+                .toList();
+
         Map<String, JsonNode> baselineThroughput = indexThroughput(baseline.path("throughput"));
         Map<String, JsonNode> candidateThroughput = indexThroughput(candidate.path("throughput"));
         List<CurrentSpeedThroughputDiff> throughputDiffs = intersectKeys(baselineThroughput, candidateThroughput).stream()
@@ -237,7 +304,10 @@ private CurrentSpeedDiffReport buildCurrentSpeedDiff(DiffInput input, JsonNode b
                 input.candidatePath().toString(),
                 baseline.path("timestamp").asText(),
                 candidate.path("timestamp").asText(),
+                addedKeys(baselineLatency, candidateLatency),
+                removedKeys(baselineLatency, candidateLatency),
                 latencyDiffs,
+                stageDiffs,
                 throughputDiffs
         );
     }
@@ -294,6 +364,16 @@ private static List<String> intersectKeys(Map<String, JsonNode> left, Map<String
                 .toList();
     }
 
+    /** Keys present in {@code candidate} but not {@code baseline} (new scenarios). */
+    private static List<String> addedKeys(Map<String, JsonNode> baseline, Map<String, JsonNode> candidate) {
+        return candidate.keySet().stream().filter(key -> !baseline.containsKey(key)).sorted().toList();
+    }
+
+    /** Keys present in {@code baseline} but not {@code candidate} (dropped scenarios). */
+    private static List<String> removedKeys(Map<String, JsonNode> baseline, Map<String, JsonNode> candidate) {
+        return baseline.keySet().stream().filter(key -> !candidate.containsKey(key)).sorted().toList();
+    }
+
     private static Iterable<JsonNode> iterable(JsonNode array) {
         return () -> new Iterator<>() {
             private final Iterator<JsonNode> delegate = array.iterator();
@@ -477,11 +557,29 @@ private record CurrentSpeedThroughputDiff(String scenario,
                                               double avgMillisPerDocDeltaPct) {
     }
 
+    private record StageDiff(String scenario,
+                             double baselineComposeMillis,
+                             double candidateComposeMillis,
+                             double composeDeltaPct,
+                             double baselineLayoutMillis,
+                             double candidateLayoutMillis,
+                             double layoutDeltaPct,
+                             double baselineRenderMillis,
+                             double candidateRenderMillis,
+                             double renderDeltaPct,
+                             double baselineTotalMillis,
+                             double candidateTotalMillis,
+                             double totalDeltaPct) {
+    }
+
     private record CurrentSpeedDiffReport(String baselinePath,
                                           String candidatePath,
                                           String baselineTimestamp,
                                           String candidateTimestamp,
+                                          List<String> addedScenarios,
+                                          List<String> removedScenarios,
                                           List<CurrentSpeedLatencyDiff> latency,
+                                          List<StageDiff> stages,
                                           List<CurrentSpeedThroughputDiff> throughput) {
     }
 

@@ -24,6 +24,11 @@
  * possible, so it can be diffed by {@link BenchmarkDiffTool}. The tool is meant
  * for local benchmark sessions where a few repeated runs are needed to reduce
  * machine noise before comparing results.</p>
+ *
+ * <p>The current-speed per-stage breakdown ({@code stages[]}) is <em>not</em>
+ * carried into the median aggregate — only latency and throughput are medianed.
+ * A median-vs-median diff therefore shows no compose/layout/render stage deltas;
+ * diff a single-run pair when you need stage attribution.</p>
  */
 public final class BenchmarkMedianTool {
 

@@ -60,6 +60,14 @@ Path writeCsv(String tableName, List<String> headers, List<List<String>> rows) t
             return archived;
         }
 
+        Path writeMarkdown(String name, String content) throws IOException {
+            Path latest = directory.resolve("latest-" + name + ".md");
+            Path archived = directory.resolve(name + "-" + timestamp + ".md");
+            Files.writeString(latest, content, StandardCharsets.UTF_8);
+            Files.writeString(archived, content, StandardCharsets.UTF_8);
+            return archived;
+        }
+
         Path directory() {
             return directory;
         }

@@ -0,0 +1,114 @@
+package com.demcha.compose;
+
+import com.demcha.compose.document.api.DocumentPageSize;
+import com.demcha.compose.document.api.DocumentSession;
+import com.demcha.compose.document.backend.fixed.pdf.PdfMeasurementResources;
+import com.demcha.compose.document.layout.DocumentGraph;
+import com.demcha.compose.document.layout.DocumentLayoutPassContext;
+import com.demcha.compose.document.layout.LayoutCanvas;
+import com.demcha.compose.document.layout.LayoutCompiler;
+import com.demcha.compose.document.layout.LayoutGraph;
+import com.demcha.compose.document.layout.NodeRegistry;
+import com.demcha.compose.document.node.DocumentNode;
+
+import java.lang.management.ManagementFactory;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Deterministic allocation probe for the v1.8 chart subsystem: warm
+ * (JIT-steady) bytes allocated by the layout-compile pass of a chart-heavy
+ * document (a grouped bar, a multi-series line, and a pie). Charts are resolved
+ * into engine primitives during compile, so this isolates the chart-resolve +
+ * geometry-emission allocation — the noise-free signal a develop-vs-branch A/B
+ * needs. No {@code src/main} changes.
+ *
+ * @author Artem Demchyshyn
+ */
+public final class ChartAllocProbe {
+
+    private static final com.sun.management.ThreadMXBean THREAD_MX =
+            (com.sun.management.ThreadMXBean) ManagementFactory.getThreadMXBean();
+
+    private static final int WARMUP = 60;
+    private static final int MEASURE = 11;
+
+    public static void main(String[] args) throws Exception {
+        BenchmarkSupport.configureQuietLogging();
+        enableAllocationMeasurement();
+
+        try (DocumentSession session = GraphCompose.document()
+                .pageSize(DocumentPageSize.A4)
+                .margin(24, 24, 24, 24)
+                .create()) {
+            session.pageFlow(flow -> flow
+                    .chart(ChartBenchmarkFixtures.barSpec(), ChartBenchmarkFixtures.barStyle())
+                    .chart(ChartBenchmarkFixtures.lineSpec(), ChartBenchmarkFixtures.lineStyle())
+                    .chart(ChartBenchmarkFixtures.pieSpec()));
+
+            List<DocumentNode> roots = session.roots();
+            LayoutCanvas canvas = session.canvas();
+            NodeRegistry registry = session.registry();
+
+            try (PdfMeasurementResources resources = PdfMeasurementResources.open(List.of())) {
+                LayoutCompiler compiler = new LayoutCompiler(registry);
+                DocumentGraph graph = new DocumentGraph(roots);
+
+                int pages = 0;
+                // Warm up so the measured allocation is JIT steady state, not
+                // class-load / first-call cold start.
+                for (int i = 0; i < WARMUP; i++) {
+                    pages = compile(compiler, graph, registry, canvas, resources).totalPages();
+                }
+
+                long[] alloc = new long[MEASURE];
+                for (int m = 0; m < MEASURE; m++) {
+                    long before = currentThreadAllocatedBytes();
+                    LayoutGraph layout = compile(compiler, graph, registry, canvas, resources);
+                    alloc[m] = before < 0 ? -1 : currentThreadAllocatedBytes() - before;
+                    pages = layout.totalPages();
+                }
+                Arrays.sort(alloc);
+
+                System.out.println("GraphCompose chart layout-compile allocation probe");
+                System.out.printf("document: grouped bar + line (12 cats x 3 series) + 6-slice pie, pages: %d%n", pages);
+                System.out.printf("warm compile allocation (median of %d): %s%n",
+                        MEASURE, kb(alloc[MEASURE / 2]));
+                System.out.printf("  min %s / max %s%n", kb(alloc[0]), kb(alloc[MEASURE - 1]));
+            }
+        }
+    }
+
+    private static LayoutGraph compile(LayoutCompiler compiler, DocumentGraph graph,
+                                       NodeRegistry registry, LayoutCanvas canvas,
+                                       PdfMeasurementResources resources) {
+        DocumentLayoutPassContext context = new DocumentLayoutPassContext(
+                registry, canvas, resources.fontLibrary(), resources.textMeasurementSystem(), false);
+        return compiler.compile(graph, context, context);
+    }
+
+    private static String kb(long bytes) {
+        return bytes < 0 ? "n/a (allocation measurement unsupported)" : "%.1f KB".formatted(bytes / 1024.0);
+    }
+
+    private static void enableAllocationMeasurement() {
+        try {
+            if (THREAD_MX.isThreadAllocatedMemorySupported() && !THREAD_MX.isThreadAllocatedMemoryEnabled()) {
+                THREAD_MX.setThreadAllocatedMemoryEnabled(true);
+            }
+        } catch (UnsupportedOperationException ignored) {
+            // Allocation measurement unsupported on this JVM; the probe reports n/a.
+        }
+    }
+
+    private static long currentThreadAllocatedBytes() {
+        try {
+            if (!THREAD_MX.isThreadAllocatedMemorySupported() || !THREAD_MX.isThreadAllocatedMemoryEnabled()) {
+                return -1;
+            }
+        } catch (UnsupportedOperationException ex) {
+            return -1;
+        }
+        return THREAD_MX.getCurrentThreadAllocatedBytes();
+    }
+}