diff --git a/docs/agents/evals.md b/docs/agents/evals.md
index 375ed30..b181085 100644
--- a/docs/agents/evals.md
+++ b/docs/agents/evals.md
@@ -265,7 +265,11 @@ Update this section whenever active eval membership or scoring changes.
   bodies into helpers. Its high multi-line-lambda criterion weight is intentional focused
   behavior-delta coverage, not ordinary broad lift evidence.
 - Hard-stop scan audits: regression explicit workflow-use only.
-- Reference suite: 6 scenarios, 560 total checklist points. Deleted reference number 12 and
+- Reference suite: 20 scenarios, 1960 total checklist points. Reference numbers `29` through `42`
+  cover the open issue sweep for bounded duplicate lookup, findAny audits, immutable/result
+  collection boundaries, predicate loops, parser-preserving streams, collector rationale, formatting,
+  identity mappers, batched lookup phases, mapMulti extraction, tail allMatch checks, and forEach
+  side-effect classification. Deleted reference number 12 and
   regression-moved scenarios are not counted.
 - Regression suite: 19 scenarios, 1820 total checklist points.
 - Hosted benchmark evidence is pending rerun for the current active suite. Do not publish exact
diff --git a/evals-reference/29-bounded-duplicate-detection-stream/capability.txt b/evals-reference/29-bounded-duplicate-detection-stream/capability.txt
new file mode 100644
index 0000000..4a88324
--- /dev/null
+++ b/evals-reference/29-bounded-duplicate-detection-stream/capability.txt
@@ -0,0 +1,2 @@
+Refactor duplicate-aware Java stream lookups so they inspect at most two matches while preserving
+zero, one, and ambiguous-match behavior.
diff --git a/evals-reference/29-bounded-duplicate-detection-stream/criteria.json b/evals-reference/29-bounded-duplicate-detection-stream/criteria.json
new file mode 100644
index 0000000..d6087f8
--- /dev/null
+++ b/evals-reference/29-bounded-duplicate-detection-stream/criteria.json
@@ -0,0 +1,56 @@
+{
+  "context": "Reference focused cleanup: duplicate-aware lookup helpers should use bounded stream collection without accepting ambiguous matches.",
+  "type": "weighted_checklist",
+  "checklist": [
+    {
+      "name": "Creates coherent Java 21 artifact",
+      "category": "safety",
+      "max_score": 8,
+      "description": "Returns a complete revised ChecklistLookup.java snippet with necessary imports and Java 21-compatible code."
+    },
+    {
+      "name": "Uses bounded duplicate detection",
+      "category": "stream_quality",
+      "max_score": 28,
+      "description": "Filters matching values, limits the stream to at most two matches, materializes only those bounded matches, and branches on zero, one, or ambiguous matches."
+    },
+    {
+      "name": "Rejects findFirst shortcut",
+      "category": "stream_quality",
+      "max_score": 18,
+      "description": "Does not replace the loop with findFirst, findAny, or an equivalent first-match shortcut that would silently accept duplicate matches."
+    },
+    {
+      "name": "Shares the repeated branch carefully",
+      "category": "maintainability",
+      "max_score": 16,
+      "description": "Extracts a small generic helper for the shared zero/one/ambiguous branch, or otherwise removes meaningful duplication without hiding the domain-specific predicate, error code, or message."
+    },
+    {
+      "name": "Preserves matching behavior",
+      "category": "safety",
+      "max_score": 12,
+      "description": "Keeps Objects.equals-style null-safe matching for checklist names and item text, preserves input encounter order for the single returned match, and still returns null when no match exists."
+    },
+    {
+      "name": "Preserves exceptions",
+      "category": "safety",
+      "max_score": 10,
+      "description": "Keeps the stable TrelloException type, duplicate error codes, and duplicate messages for both helper methods."
+    },
+    {
+      "name": "Avoids over-engineering",
+      "category": "maintainability",
+      "max_score": 8,
+      "description": "Does not introduce broad lookup frameworks, caches, parallel streams, or unrelated API changes."
+    }
+  ],
+  "metadata": {
+    "invocation": "natural",
+    "task_type": "cleanup",
+    "evidence_type": "focused_reference",
+    "issue": "https://github.com/martinfrancois/java-streams-skill/issues/53",
+    "reference_selection": "Focused issue #53 coverage for bounded duplicate-detection stream lookups.",
+    "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; do not report as ordinary broad lift if runtime references later teach this same shape."
+  }
+}
diff --git a/evals-reference/29-bounded-duplicate-detection-stream/task.md b/evals-reference/29-bounded-duplicate-detection-stream/task.md
new file mode 100644
index 0000000..4c46c6b
--- /dev/null
+++ b/evals-reference/29-bounded-duplicate-detection-stream/task.md
@@ -0,0 +1,69 @@
+# Refactor duplicate-aware lookups
+
+Refactor `ChecklistLookup.java` with a stream-based implementation. Assume Java 21.
+
+Return the revised Java code only.
+
+```java
+import java.util.List;
+import java.util.Objects;
+
+final class ChecklistLookup {
+    static Card.Checklist singleChecklistByName(List<Card.Checklist> checklists, String checklistName) {
+        Card.Checklist match = null;
+        for (Card.Checklist checklist : checklists) {
+            if (!Objects.equals(checklist.name(), checklistName)) {
+                continue;
+            }
+            if (match != null) {
+                throw new TrelloException(
+                        "trello_checklist_ambiguous",
+                        "Multiple Trello checklists match the requested checklist_name.");
+            }
+            match = checklist;
+        }
+        return match;
+    }
+
+    static Card.ChecklistItem singleCheckItemByName(Card.Checklist checklist, String itemName) {
+        Card.ChecklistItem match = null;
+        for (Card.ChecklistItem item : checklist.items()) {
+            if (!Objects.equals(item.text(), itemName)) {
+                continue;
+            }
+            if (match != null) {
+                throw new TrelloException(
+                        "trello_check_item_ambiguous",
+                        "Multiple Trello checklist items match the requested item_name.");
+            }
+            match = item;
+        }
+        return match;
+    }
+
+    record Card(List<Checklist> checklists) {
+        record Checklist(String name, List<ChecklistItem> items) {}
+        record ChecklistItem(String text) {}
+    }
+
+    static final class TrelloException extends RuntimeException {
+        private final String code;
+
+        TrelloException(String code, String message) {
+            super(message);
+            this.code = code;
+        }
+
+        String code() {
+            return code;
+        }
+    }
+}
+```
+
+Preserve null-safe name matching, no-match `null` behavior, encounter order for the single returned
+match, and the existing exception codes and messages. The lookup only needs to distinguish zero
+matches, exactly one match, and at least two matches, so do not scan or retain matches after
+ambiguity is already proven. If both lookup methods need the same zero, one, or ambiguous branch,
+extract that branch into a small shared helper while keeping the predicates and error contracts
+domain-specific. Keep the code small.
diff --git a/evals-reference/30-prefer-findany-equivalent-matches/capability.txt b/evals-reference/30-prefer-findany-equivalent-matches/capability.txt
new file mode 100644
index 0000000..931c377
--- /dev/null
+++ b/evals-reference/30-prefer-findany-equivalent-matches/capability.txt
@@ -0,0 +1,2 @@
+Audit Optional-returning stream terminals and choose findAny only when encounter order is not part
+of the result contract.
diff --git a/evals-reference/30-prefer-findany-equivalent-matches/criteria.json b/evals-reference/30-prefer-findany-equivalent-matches/criteria.json
new file mode 100644
index 0000000..d6c69ed
--- /dev/null
+++ b/evals-reference/30-prefer-findany-equivalent-matches/criteria.json
@@ -0,0 +1,50 @@
+{
+  "context": "Reference focused cleanup: findAny should express equivalent-match lookups while findFirst remains for ordered contracts.",
+  "type": "weighted_checklist",
+  "checklist": [
+    {
+      "name": "Creates coherent Java 17 artifact",
+      "category": "safety",
+      "max_score": 8,
+      "description": "Returns revised Java 17-compatible LookupTerminals code with imports, methods, helper, and record intact."
+    },
+    {
+      "name": "Uses findAny for equivalent matches",
+      "category": "stream_quality",
+      "max_score": 30,
+      "description": "Changes exact or normalized configured-name lookups to findAny because all valid matches are equivalent or expected to be unique by contract."
+    },
+    {
+      "name": "Preserves ordered first-match contracts",
+      "category": "stream_quality",
+      "max_score": 24,
+      "description": "Keeps findFirst for PATH-style search order and first output line selection where encounter order selects the result."
+    },
+    {
+      "name": "Explains retained findFirst calls",
+      "category": "maintainability",
+      "max_score": 12,
+      "description": "Adds concise comments or equivalent explanation for each retained findFirst call that identify the order contract rather than relying on current sequential behavior."
+    },
+    {
+      "name": "Avoids mechanical replacement",
+      "category": "safety",
+      "max_score": 14,
+      "description": "Does not replace every findFirst mechanically, does not use findAny for fallback or first-line behavior, and does not claim tests alone prove order irrelevance."
+    },
+    {
+      "name": "Keeps filters and normalization",
+      "category": "safety",
+      "max_score": 12,
+      "description": "Preserves case-insensitive list matching, closed-list filtering, normalization, path resolution, and output-line filtering."
+    }
+  ],
+  "metadata": {
+    "invocation": "natural",
+    "task_type": "cleanup",
+    "evidence_type": "focused_reference",
+    "issue": "https://github.com/martinfrancois/java-streams-skill/issues/51",
+    "reference_selection": "Focused issue #51 coverage for findAny versus findFirst semantic audits.",
+    "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; do not report as ordinary broad lift if runtime references later teach this same audit shape."
+  }
+}
diff --git a/evals-reference/30-prefer-findany-equivalent-matches/task.md b/evals-reference/30-prefer-findany-equivalent-matches/task.md
new file mode 100644
index 0000000..13fdd00
--- /dev/null
+++ b/evals-reference/30-prefer-findany-equivalent-matches/task.md
@@ -0,0 +1,49 @@
+# Audit Optional stream terminals
+
+Refactor `LookupTerminals.java` only where the terminal operation's contract is clearer. Assume Java 17.
+
+Return the revised Java code and one brief comment beside each retained `findFirst()` explaining
+why the first match is semantically required.
+
+```java
+import java.nio.file.Path;
+import java.util.List;
+import java.util.Locale;
+import java.util.Optional;
+
+final class LookupTerminals {
+    static Optional<String> detectedList(List<String> openListNames, String expectedName) {
+        return openListNames.stream()
+                .filter(name -> name.equalsIgnoreCase(expectedName))
+                .findFirst();
+    }
+
+    static Optional<BoardList> targetList(List<BoardList> lists, String configuredName) {
+        String expected = normalize(configuredName);
+        return lists.stream()
+                .filter(list -> !list.closed())
+                .filter(list -> normalize(list.name()).equals(expected))
+                .findFirst();
+    }
+
+    static Optional<Path> firstExistingPath(List<Path> searchPath, String commandName) {
+        return searchPath.stream()
+                .map(path -> path.resolve(commandName))
+                .filter(path -> path.toFile().exists())
+                .findFirst();
+    }
+
+    static Optional<String> firstVersionLine(String output) {
+        return output.lines()
+                .map(String::stripLeading)
+                .filter(line -> line.startsWith("java "))
+                .findFirst();
+    }
+
+    private static String normalize(String value) {
+        return value.toLowerCase(Locale.ROOT).replaceAll("\\s+", " ").strip();
+    }
+
+    record BoardList(String id, String name, boolean closed) {}
+}
+```
diff --git a/evals-reference/31-immutable-result-append-list/capability.txt b/evals-reference/31-immutable-result-append-list/capability.txt
new file mode 100644
index 0000000..d94ef80
--- /dev/null
+++ b/evals-reference/31-immutable-result-append-list/capability.txt
@@ -0,0 +1,2 @@
+Replace simple temporary append buffers with direct stream-owned immutable results while keeping
+mutable builders where they remain clearer.
diff --git a/evals-reference/31-immutable-result-append-list/criteria.json b/evals-reference/31-immutable-result-append-list/criteria.json
new file mode 100644
index 0000000..c725c3f
--- /dev/null
+++ b/evals-reference/31-immutable-result-append-list/criteria.json
@@ -0,0 +1,50 @@
+{
+  "context": "Reference focused cleanup: a temporary mutable append list can become a direct immutable result when mutability is not part of the method contract.",
+  "type": "weighted_checklist",
+  "checklist": [
+    {
+      "name": "Creates coherent Java 17 artifact",
+      "category": "safety",
+      "max_score": 8,
+      "description": "Returns complete Java 17-compatible ManifestUpdate code with imports, records, methods, and constructor behavior intact."
+    },
+    {
+      "name": "Replaces simple append buffer",
+      "category": "stream_quality",
+      "max_score": 28,
+      "description": "Refactors withBoard to produce filtered existing boards plus the new board directly, for example with Stream.concat and Stream.of, instead of creating a mutable append buffer."
+    },
+    {
+      "name": "Preserves encounter order",
+      "category": "safety",
+      "max_score": 18,
+      "description": "Keeps all retained existing boards in original order and appends the new board after them."
+    },
+    {
+      "name": "Audits result mutability",
+      "category": "stream_quality",
+      "max_score": 14,
+      "description": "Recognizes that the manifest constructor copies input, so the temporary list mutability is not part of the public result contract."
+    },
+    {
+      "name": "Keeps complex builder when clearer",
+      "category": "maintainability",
+      "max_score": 16,
+      "description": "Does not force withOptionalSections into a dense stream when the conditional append and optional summary row are clearer as a small builder or loop."
+    },
+    {
+      "name": "Avoids unrelated changes",
+      "category": "safety",
+      "max_score": 16,
+      "description": "Does not change sameBoardOrWorkflow semantics, archive filtering, record fields, constructor copying, or method signatures."
+    }
+  ],
+  "metadata": {
+    "invocation": "natural",
+    "task_type": "cleanup",
+    "evidence_type": "focused_reference",
+    "issue": "https://github.com/martinfrancois/java-streams-skill/issues/50",
+    "reference_selection": "Focused issue #50 coverage for immutable result production over temporary append lists.",
+    "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; do not report as ordinary broad lift if runtime references later teach this same shape."
+  }
+}
diff --git a/evals-reference/31-immutable-result-append-list/task.md b/evals-reference/31-immutable-result-append-list/task.md
new file mode 100644
index 0000000..0413a6f
--- /dev/null
+++ b/evals-reference/31-immutable-result-append-list/task.md
@@ -0,0 +1,51 @@
+# Remove unnecessary temporary mutability
+
+Refactor `ManifestUpdate.java` where doing so improves readability without changing behavior.
+Assume Java 17.
+
+Return the revised Java code only.
+
+```java
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Stream;
+
+record ConnectedBoardManifest(List<ConnectedBoard> boards) {
+    ConnectedBoardManifest {
+        boards = List.copyOf(boards);
+    }
+
+    ConnectedBoardManifest withBoard(ConnectedBoard board) {
+        List<ConnectedBoard> updated = new ArrayList<>(boards.stream()
+                .filter(existing -> !sameBoardOrWorkflow(existing, board))
+                .toList());
+        updated.add(board);
+        return new ConnectedBoardManifest(updated);
+    }
+
+    ConnectedBoardManifest withOptionalSections(List<ConnectedBoard> selected, boolean includeArchived) {
+        List<ConnectedBoard> updated = new ArrayList<>();
+        for (ConnectedBoard board : selected) {
+            if (!board.archived() || includeArchived) {
+                updated.add(board);
+            }
+        }
+        if (includeArchived) {
+            updated.add(new ConnectedBoard("archive-summary", null, true));
+        }
+        return new ConnectedBoardManifest(updated);
+    }
+
+    private static boolean sameBoardOrWorkflow(ConnectedBoard left, ConnectedBoard right) {
+        return left.boardId().equals(right.boardId())
+                || left.workflowPath() != null && left.workflowPath().equals(right.workflowPath());
+    }
+}
+
+record ConnectedBoard(String boardId, String workflowPath, boolean archived) {}
+```
+
+The manifest constructor copies its input. Preserve encounter order, filtering, duplicate handling,
+and public API shape. Only refactor the simple temporary append-buffer case when the stream result
+stays readable; leave the conditional builder method imperative if the current loop is clearer than a
+dense stream expression.
diff --git a/evals-reference/32-predicate-loop-any-match/capability.txt b/evals-reference/32-predicate-loop-any-match/capability.txt
new file mode 100644
index 0000000..23c7c02
--- /dev/null
+++ b/evals-reference/32-predicate-loop-any-match/capability.txt
@@ -0,0 +1,2 @@
+Replace pure predicate loops with anyMatch while keeping side effects, diagnostics, and indexes out
+of stream pipelines.
diff --git a/evals-reference/32-predicate-loop-any-match/criteria.json b/evals-reference/32-predicate-loop-any-match/criteria.json
new file mode 100644
index 0000000..b55c2b7
--- /dev/null
+++ b/evals-reference/32-predicate-loop-any-match/criteria.json
@@ -0,0 +1,56 @@
+{
+  "context": "Reference focused cleanup: pure predicate loops can become anyMatch while side-effecting or index-sensitive loops should remain imperative.",
+  "type": "weighted_checklist",
+  "checklist": [
+    {
+      "name": "Creates coherent Java 17 artifact",
+      "category": "safety",
+      "max_score": 8,
+      "description": "Returns complete Java 17-compatible ManifestChecks code with necessary imports and all methods present."
+    },
+    {
+      "name": "Uses anyMatch for boolean predicate",
+      "category": "stream_quality",
+      "max_score": 24,
+      "description": "Refactors hasNonObjectBoardRow to use StreamSupport.stream(..., false).anyMatch(...) or an equivalent short-circuiting stream over the JsonNode iterable."
+    },
+    {
+      "name": "Keeps terminal warning side effect outside stream",
+      "category": "stream_quality",
+      "max_score": 18,
+      "description": "Computes whether an invalid root exists with anyMatch, then adds exactly one warning outside the stream instead of mutating warnings inside the stream pipeline."
+    },
+    {
+      "name": "Preserves JsonNode behavior",
+      "category": "safety",
+      "max_score": 16,
+      "description": "Keeps null roots as no-op, non-array roots as the array warning, and non-textual or blank values as the non-blank-strings warning."
+    },
+    {
+      "name": "Rejects collector and forEach workarounds",
+      "category": "stream_quality",
+      "max_score": 14,
+      "description": "Does not collect invalid roots just to check emptiness and does not use forEach with an external boolean holder or warning mutation."
+    },
+    {
+      "name": "Preserves index-sensitive loop",
+      "category": "maintainability",
+      "max_score": 12,
+      "description": "Keeps writeRows as a clear loop or otherwise preserves the index-specific output without forcing a noisy stream rewrite."
+    },
+    {
+      "name": "Explains terminal choice through code",
+      "category": "maintainability",
+      "max_score": 8,
+      "description": "Uses anyMatch for existence questions rather than findAny or findFirst when the required result is a boolean."
+    }
+  ],
+  "metadata": {
+    "invocation": "natural",
+    "task_type": "cleanup",
+    "evidence_type": "focused_reference",
+    "issue": "https://github.com/martinfrancois/java-streams-skill/issues/49",
+    "reference_selection": "Focused issue #49 coverage for predicate-only loops as anyMatch.",
+    "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; do not report as ordinary broad lift if runtime references later teach this same shape."
+  }
+}
diff --git a/evals-reference/32-predicate-loop-any-match/task.md b/evals-reference/32-predicate-loop-any-match/task.md
new file mode 100644
index 0000000..adc50db
--- /dev/null
+++ b/evals-reference/32-predicate-loop-any-match/task.md
@@ -0,0 +1,49 @@
+# Clean up predicate-only loops
+
+Refactor `ManifestChecks.java` where a stream terminal makes the intent clearer. Assume Java 17.
+
+Return the revised Java code only.
+
+```java
+import com.fasterxml.jackson.databind.JsonNode;
+import java.util.List;
+
+final class ManifestChecks {
+    static boolean hasNonObjectBoardRow(JsonNode root) {
+        JsonNode boards = root.path("boards");
+        for (JsonNode board : boards) {
+            if (!board.isObject()) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    static void requireWritableRoots(JsonNode board, String label, List<String> warnings) {
+        JsonNode roots = board.get("additionalWritableRoots");
+        if (roots == null) {
+            return;
+        }
+        if (!roots.isArray()) {
+            warnings.add("Entry " + label + " field additionalWritableRoots must be an array.");
+            return;
+        }
+        for (JsonNode root : roots) {
+            if (!root.isTextual() || root.asText().isBlank()) {
+                warnings.add("Entry " + label + " field additionalWritableRoots must contain non-blank strings.");
+                return;
+            }
+        }
+    }
+
+    static void writeRows(List<String> rows, JsonNode boards) {
+        int index = 0;
+        for (JsonNode board : boards) {
+            rows.add(index + ":" + board.path("name").asText());
+            index++;
+        }
+    }
+}
+```
+
+Preserve warning text, short-circuit behavior, and index-sensitive row output.
diff --git a/evals-reference/33-final-collection-boundary/capability.txt b/evals-reference/33-final-collection-boundary/capability.txt
new file mode 100644
index 0000000..e5c505b
--- /dev/null
+++ b/evals-reference/33-final-collection-boundary/capability.txt
@@ -0,0 +1,2 @@
+Choose direct final collection results when streams own the output, and keep explicit accumulator
+boundaries when mutation is the real operation.
diff --git a/evals-reference/33-final-collection-boundary/criteria.json b/evals-reference/33-final-collection-boundary/criteria.json
new file mode 100644
index 0000000..e36af64
--- /dev/null
+++ b/evals-reference/33-final-collection-boundary/criteria.json
@@ -0,0 +1,56 @@
+{
+  "context": "Reference focused cleanup: choose between stream-owned final collections and honest mutable accumulator boundaries.",
+  "type": "weighted_checklist",
+  "checklist": [
+    {
+      "name": "Creates coherent Java 17 artifact",
+      "category": "safety",
+      "max_score": 8,
+      "description": "Returns complete Java 17-compatible ReservationDiscovery code with imports, methods, record, and interface intact."
+    },
+    {
+      "name": "Collects final set directly",
+      "category": "stream_quality",
+      "max_score": 26,
+      "description": "Refactors localWorkflowFilePortReservations so the Files.list stream directly returns the final Set result instead of collecting to a temporary list and copying it with addAll."
+    },
+    {
+      "name": "Uses explicit immutable or set collector intent",
+      "category": "stream_quality",
+      "max_score": 14,
+      "description": "Chooses a set-producing terminal or collector whose mutability/order semantics are audited, rather than using toList only to copy into a HashSet."
+    },
+    {
+      "name": "Preserves IO behavior",
+      "category": "safety",
+      "max_score": 16,
+      "description": "Still returns an empty set for null config dirs, non-directories, or IOException while listing files, and keeps the same file filters and Optional flattening."
+    },
+    {
+      "name": "Keeps accumulator boundary honest",
+      "category": "stream_quality",
+      "max_score": 18,
+      "description": "Does not keep or introduce toList plus addAll in extendCleanupList when the temporary list only feeds the existing mutable cleanup accumulator; uses a direct append boundary or a clear loop."
+    },
+    {
+      "name": "Avoids forEach misuse",
+      "category": "maintainability",
+      "max_score": 10,
+      "description": "Does not replace a result-producing helper with stream().forEach(set::add) when returning a final collection would be clearer."
+    },
+    {
+      "name": "No unrelated behavior changes",
+      "category": "safety",
+      "max_score": 8,
+      "description": "Keeps best-effort cleanup exception handling, duplicate collapsing for set results, and existing method signatures."
+    }
+  ],
+  "metadata": {
+    "invocation": "natural",
+    "task_type": "cleanup",
+    "evidence_type": "focused_reference",
+    "issue": "https://github.com/martinfrancois/java-streams-skill/issues/48",
+    "reference_selection": "Focused issue #48 coverage for final collection boundaries versus addAll copies.",
+    "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; do not report as ordinary broad lift if runtime references later teach this same shape."
+  }
+}
diff --git a/evals-reference/33-final-collection-boundary/task.md b/evals-reference/33-final-collection-boundary/task.md
new file mode 100644
index 0000000..d57c679
--- /dev/null
+++ b/evals-reference/33-final-collection-boundary/task.md
@@ -0,0 +1,63 @@
+# Clean up final collection boundaries
+
+Refactor `ReservationDiscovery.java` where a stream can own the final result. Assume Java 17.
+
+Return the revised Java code only.
+
+```java
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Optional;
+import java.util.Set;
+
+final class ReservationDiscovery {
+    Set<Integer> localWorkflowFilePortReservations(Path configDir, ConnectedBoard ignoredBoard) {
+        Set<Integer> reserved = new HashSet<>();
+        if (configDir == null || !Files.isDirectory(configDir)) {
+            return reserved;
+        }
+        try (var files = Files.list(configDir)) {
+            List<Integer> workflowPorts = files.filter(Files::isRegularFile)
+                    .filter(file -> file.getFileName().toString().endsWith(".md"))
+                    .filter(file -> ignoredBoard.workflowPath() == null
+                            || !file.equals(ignoredBoard.workflowPath()))
+                    .map(this::serverPort)
+                    .flatMap(Optional::stream)
+                    .toList();
+            reserved.addAll(workflowPorts);
+        } catch (IOException ignored) {
+            // Leave only manifest and probe checks.
+        }
+        return reserved;
+    }
+
+    void extendCleanupList(List<String> boardIds, Trello trello, String workspaceId, String runId) {
+        try {
+            List<String> openDisposableBoardIds = trello.openBoardIdsByNamePrefix(workspaceId, runId).stream()
+                    .filter(boardId -> !boardIds.contains(boardId))
+                    .toList();
+            boardIds.addAll(openDisposableBoardIds);
+        } catch (RuntimeException ignored) {
+            // Cleanup is best effort.
+        }
+    }
+
+    private Optional<Integer> serverPort(Path workflow) {
+        return Optional.empty();
+    }
+
+    record ConnectedBoard(Path workflowPath) {}
+
+    interface Trello {
+        List<String> openBoardIdsByNamePrefix(String workspaceId, String runId);
+    }
+}
+```
+
+Preserve empty and unreadable-directory behavior. The cleanup list method intentionally extends an
+existing mutable accumulator; avoid creating a temporary stream result whose only purpose is to be
+copied into that accumulator.
diff --git a/evals-reference/34-preserve-regex-line-splitting/capability.txt b/evals-reference/34-preserve-regex-line-splitting/capability.txt
new file mode 100644
index 0000000..6497f1d
--- /dev/null
+++ b/evals-reference/34-preserve-regex-line-splitting/capability.txt
@@ -0,0 +1,2 @@
+Refactor stream line parsing while preserving regex line-break semantics and avoiding external
+mutable accumulation.
diff --git a/evals-reference/34-preserve-regex-line-splitting/criteria.json b/evals-reference/34-preserve-regex-line-splitting/criteria.json
new file mode 100644
index 0000000..65d5b8a
--- /dev/null
+++ b/evals-reference/34-preserve-regex-line-splitting/criteria.json
@@ -0,0 +1,56 @@
+{
+  "context": "Reference focused cleanup: stream refactors must preserve regex-based line splitting semantics.",
+  "type": "weighted_checklist",
+  "checklist": [
+    {
+      "name": "Creates coherent Java 17 artifact",
+      "category": "safety",
+      "max_score": 8,
+      "description": "Returns complete Java 17-compatible SourceDeclarations code with imports, records, methods, and Pattern usage intact."
+    },
+    {
+      "name": "Preserves regex line-break contract",
+      "category": "safety",
+      "max_score": 24,
+      "description": "Does not replace split(\"\\\\R\", -1) with String.lines() unless it explicitly preserves the original regex line-break semantics; uses Pattern.splitAsStream or an equivalent regex-preserving source."
+    },
+    {
+      "name": "Produces declarations through stream result",
+      "category": "stream_quality",
+      "max_score": 22,
+      "description": "Refactors the top-level extraction so streams produce the returned declarations directly rather than using forEach to mutate an external list."
+    },
+    {
+      "name": "Preserves inputs searched",
+      "category": "safety",
+      "max_score": 14,
+      "description": "Still searches title, description, and every comment text, and still ignores null or blank text blocks."
+    },
+    {
+      "name": "Preserves matching rule",
+      "category": "safety",
+      "max_score": 12,
+      "description": "Keeps LABELED_SOURCE.matcher(line).matches() semantics and Declaration construction from the captured source value."
+    },
+    {
+      "name": "Avoids side-effecting stream terminal",
+      "category": "stream_quality",
+      "max_score": 12,
+      "description": "Does not keep text stream processing as text.lines().forEach(...) or another stream terminal that mutates a caller-owned list."
+    },
+    {
+      "name": "Keeps refactor focused",
+      "category": "maintainability",
+      "max_score": 8,
+      "description": "Does not add broad parser features, change source labels, change public records, or introduce unrelated abstractions."
+    }
+  ],
+  "metadata": {
+    "invocation": "natural",
+    "task_type": "cleanup",
+    "evidence_type": "focused_reference",
+    "issue": "https://github.com/martinfrancois/java-streams-skill/issues/47",
+    "reference_selection": "Focused issue #47 coverage for regex line-splitting preservation during stream refactors.",
+    "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; do not report as ordinary broad lift if runtime references later teach this same shape."
+  }
+}
diff --git a/evals-reference/34-preserve-regex-line-splitting/task.md b/evals-reference/34-preserve-regex-line-splitting/task.md
new file mode 100644
index 0000000..fda57fa
--- /dev/null
+++ b/evals-reference/34-preserve-regex-line-splitting/task.md
@@ -0,0 +1,46 @@
+# Preserve parser line splitting
+
+Refactor `SourceDeclarations.java` to make the declaration extraction more stream-oriented without
+changing the parser contract. Assume Java 17.
+
+Return the revised Java code only.
+
+```java
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+final class SourceDeclarations {
+    private static final Pattern LABELED_SOURCE = Pattern.compile("(?i)^source[\\t ]*:[\\t ]*(.*)$");
+
+    static List<Declaration> declarations(Card card) {
+        List<Declaration> declarations = new ArrayList<>();
+        addDeclarations(declarations, card.title());
+        addDeclarations(declarations, card.description());
+        card.comments().stream()
+                .map(Comment::text)
+                .forEach(text -> addDeclarations(declarations, text));
+        return List.copyOf(declarations);
+    }
+
+    private static void addDeclarations(List<Declaration> declarations, String text) {
+        if (text == null || text.isBlank()) {
+            return;
+        }
+        for (String line : text.split("\\R", -1)) {
+            Matcher labeled = LABELED_SOURCE.matcher(line);
+            if (labeled.matches()) {
+                declarations.add(new Declaration(labeled.group(1)));
+            }
+        }
+    }
+
+    record Card(String title, String description, List<Comment> comments) {}
+    record Comment(String text) {}
+    record Declaration(String value) {}
+}
+```
+
+The existing parser recognizes Java regex `\R` line breaks. Preserve null and blank text behavior,
+title/description/comment coverage, and unmodifiable returned-list behavior.
diff --git a/evals-reference/35-to-collection-rationale/capability.txt b/evals-reference/35-to-collection-rationale/capability.txt
new file mode 100644
index 0000000..645abab
--- /dev/null
+++ b/evals-reference/35-to-collection-rationale/capability.txt
@@ -0,0 +1,2 @@
+Audit Collectors.toCollection calls and keep concrete collection types only when mutability,
+membership, or order semantics require them.
diff --git a/evals-reference/35-to-collection-rationale/criteria.json b/evals-reference/35-to-collection-rationale/criteria.json
new file mode 100644
index 0000000..31972d8
--- /dev/null
+++ b/evals-reference/35-to-collection-rationale/criteria.json
@@ -0,0 +1,56 @@
+{
+  "context": "Reference focused cleanup: concrete toCollection collectors should be justified or simplified based on collection semantics.",
+  "type": "weighted_checklist",
+  "checklist": [
+    {
+      "name": "Creates coherent Java 17 artifact",
+      "category": "safety",
+      "max_score": 8,
+      "description": "Returns complete Java 17-compatible CollectorChoices code with imports, records, and methods intact."
+    },
+    {
+      "name": "Keeps mutable collector with rationale",
+      "category": "stream_quality",
+      "max_score": 18,
+      "description": "Keeps the ArrayList collection for terminalCards only because the result is appended to before the immutable copy, and makes that mutability reason clear with a concise comment or equivalent code structure. Award full credit when the rationale is visible near the collector or later mutation; do not penalize harmless comment placement."
+    },
+    {
+      "name": "Simplifies unnecessary HashSet collector",
+      "category": "stream_quality",
+      "max_score": 20,
+      "description": "Simplifies archivedListIds when the concrete HashSet implementation is not required, while avoiding ambiguous or policy-violating collector choices if the surrounding codebase requires explicit mutability."
+    },
+    {
+      "name": "Preserves order-preserving de-duplication",
+      "category": "stream_quality",
+      "max_score": 18,
+      "description": "Keeps LinkedHashSet or an equivalent encounter-order-preserving set for unconnectedWorkflowPaths, because diagnostics should de-duplicate while preserving encounter order."
+    },
+    {
+      "name": "Documents concrete type reasons",
+      "category": "maintainability",
+      "max_score": 16,
+      "description": "Adds concise comments or code structure that explains mutability, membership, or encounter-order reasons instead of restating the collector syntax. Award full credit when the reason is clear even if the comment is placed just after the collector expression."
+    },
+    {
+      "name": "Avoids mechanical replacement",
+      "category": "safety",
+      "max_score": 12,
+      "description": "Does not replace every toCollection mechanically with toSet, toList, or stream().forEach(...) without checking mutability and order behavior."
+    },
+    {
+      "name": "Keeps behavior unchanged",
+      "category": "safety",
+      "max_score": 8,
+      "description": "Preserves archive-summary append behavior, closed-list filtering, selected-path filtering, and method signatures."
+    }
+  ],
+  "metadata": {
+    "invocation": "natural",
+    "task_type": "cleanup",
+    "evidence_type": "focused_reference",
+    "issue": "https://github.com/martinfrancois/java-streams-skill/issues/46",
+    "reference_selection": "Focused issue #46 coverage for explaining or simplifying Collectors.toCollection choices.",
+    "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; do not report as ordinary broad lift if runtime references later teach this same shape."
+  }
+}
diff --git a/evals-reference/35-to-collection-rationale/task.md b/evals-reference/35-to-collection-rationale/task.md
new file mode 100644
index 0000000..9e6ba84
--- /dev/null
+++ b/evals-reference/35-to-collection-rationale/task.md
@@ -0,0 +1,43 @@
+# Clarify concrete collector choices
+
+Review and clean up `CollectorChoices.java`. Assume Java 17.
+
+Return the revised Java code only. Keep concrete collection collectors only where the concrete type
+is part of the method's behavior. When a later operation requires mutability or encounter-order
+preserving de-duplication, keep that concrete collector and make the reason visible in the code.
+
+```java
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.SequencedSet;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+final class CollectorChoices {
+    static List<Card> terminalCards(List<Card> boardCards) {
+        List<Card> normalized = boardCards.stream()
+                .filter(Card::terminal)
+                .collect(Collectors.toCollection(ArrayList::new));
+        normalized.add(new Card("archive-summary", true));
+        return List.copyOf(normalized);
+    }
+
+    static Set<String> archivedListIds(List<BoardList> lists) {
+        return lists.stream()
+                .filter(BoardList::closed)
+                .map(BoardList::id)
+                .collect(Collectors.toCollection(HashSet::new));
+    }
+
+    static SequencedSet<String> unconnectedWorkflowPaths(List<String> reported, Set<String> selected) {
+        return reported.stream()
+                .filter(path -> !selected.contains(path))
+                .collect(Collectors.toCollection(LinkedHashSet::new));
+    }
+
+    record Card(String id, boolean terminal) {}
+    record BoardList(String id, boolean closed) {}
+}
+```
diff --git a/evals-reference/36-wrap-multi-operation-stream-chains/capability.txt b/evals-reference/36-wrap-multi-operation-stream-chains/capability.txt
new file mode 100644
index 0000000..f516252
--- /dev/null
+++ b/evals-reference/36-wrap-multi-operation-stream-chains/capability.txt
@@ -0,0 +1,2 @@
+Format multi-operation Java stream chains so each operation after stream() is scannable without
+changing stream semantics.
diff --git a/evals-reference/36-wrap-multi-operation-stream-chains/criteria.json b/evals-reference/36-wrap-multi-operation-stream-chains/criteria.json
new file mode 100644
index 0000000..f559764
--- /dev/null
+++ b/evals-reference/36-wrap-multi-operation-stream-chains/criteria.json
@@ -0,0 +1,50 @@
+{
+  "context": "Reference focused cleanup: multi-operation stream chains should be wrapped for readability without semantic changes.",
+  "type": "weighted_checklist",
+  "checklist": [
+    {
+      "name": "Creates coherent Java 17 artifact",
+      "category": "safety",
+      "max_score": 8,
+      "description": "Returns complete Java 17-compatible StreamFormattingSample code with imports, methods, and records intact."
+    },
+    {
+      "name": "Wraps multi-operation chains",
+      "category": "maintainability",
+      "max_score": 34,
+      "description": "Moves each operation after stream() to its own continuation line for chains with more than one operation after the source stream call."
+    },
+    {
+      "name": "Keeps stream on source line",
+      "category": "maintainability",
+      "max_score": 18,
+      "description": "Keeps .stream() attached to the source expression rather than moving it to a separate line."
+    },
+    {
+      "name": "Handles single-operation chain proportionately",
+      "category": "maintainability",
+      "max_score": 10,
+      "description": "Leaves the one-operation labels.stream().findFirst() chain on one line or wraps it coherently; does not apply unrelated API churn or move .stream() onto a standalone line just to satisfy the multi-operation wrapping rule."
+    },
+    {
+      "name": "Preserves stream behavior",
+      "category": "safety",
+      "max_score": 18,
+      "description": "Does not change terminal operations, collectors, mapping behavior, encounter order, fallback value, or map keys while formatting."
+    },
+    {
+      "name": "Avoids style-only API churn",
+      "category": "safety",
+      "max_score": 12,
+      "description": "Does not introduce different collectors, helper methods, parallel streams, or unrelated imports while making a formatting-only change."
+    }
+  ],
+  "metadata": {
+    "invocation": "natural",
+    "task_type": "cleanup",
+    "evidence_type": "focused_reference",
+    "issue": "https://github.com/martinfrancois/java-streams-skill/issues/45",
+    "reference_selection": "Focused issue #45 coverage for readable multi-operation stream-chain wrapping.",
+    "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; do not report as ordinary broad lift if runtime references later teach this same style rule."
+  }
+}
diff --git a/evals-reference/36-wrap-multi-operation-stream-chains/task.md b/evals-reference/36-wrap-multi-operation-stream-chains/task.md
new file mode 100644
index 0000000..8ff7802
--- /dev/null
+++ b/evals-reference/36-wrap-multi-operation-stream-chains/task.md
@@ -0,0 +1,44 @@
+# Format stream chains
+
+Format `StreamFormattingSample.java` for readability without changing behavior. Assume Java 17.
+
+Return the revised Java code only.
+
+```java
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+final class StreamFormattingSample {
+    Set<String> normalizedLabels(Card card) {
+        return card.labels().stream().map(StreamFormattingSample::normalize).collect(Collectors.toSet());
+    }
+
+    String firstLabel(Card card) {
+        return card.labels().stream().findFirst().orElse("none");
+    }
+
+    void addCardFields(Card card, java.util.Map<String, Object> values) {
+        values.put("checklists", card.checklists().stream().map(Checklist::asMap).toList());
+        values.put("attachments", card.attachments().stream().map(Attachment::asMap).toList());
+    }
+
+    private static String normalize(String value) {
+        return value.toLowerCase(java.util.Locale.ROOT).strip();
+    }
+
+    record Card(List<String> labels, List<Checklist> checklists, List<Attachment> attachments) {}
+    record Checklist(String name) {
+        java.util.Map<String, Object> asMap() {
+            return java.util.Map.of("name", name);
+        }
+    }
+    record Attachment(String name) {
+        java.util.Map<String, Object> asMap() {
+            return java.util.Map.of("name", name);
+        }
+    }
+}
+```
+
+Keep `.stream()` on the source line. One-operation chains may stay on one line when readable.
diff --git a/evals-reference/37-to-map-function-identity-mapper/capability.txt b/evals-reference/37-to-map-function-identity-mapper/capability.txt
new file mode 100644
index 0000000..a701193
--- /dev/null
+++ b/evals-reference/37-to-map-function-identity-mapper/capability.txt
@@ -0,0 +1,2 @@
+Use Function.identity for true identity mappers in toMap while preserving merge functions, map
+suppliers, and non-identity mappings.
diff --git a/evals-reference/37-to-map-function-identity-mapper/criteria.json b/evals-reference/37-to-map-function-identity-mapper/criteria.json
new file mode 100644
index 0000000..0316c8e
--- /dev/null
+++ b/evals-reference/37-to-map-function-identity-mapper/criteria.json
@@ -0,0 +1,56 @@
+{
+  "context": "Reference focused cleanup: identity key and value mappers in Collectors.toMap should use Function.identity without changing collector semantics.",
+  "type": "weighted_checklist",
+  "checklist": [
+    {
+      "name": "Creates coherent Java 17 artifact",
+      "category": "safety",
+      "max_score": 8,
+      "description": "Returns complete Java 17-compatible StateIndexes code and adds any required java.util.function.Function import."
+    },
+    {
+      "name": "Uses Function.identity for identity key mapper",
+      "category": "stream_quality",
+      "max_score": 20,
+      "description": "Replaces state -> state with Function.identity() in the normalized state counting collector."
+    },
+    {
+      "name": "Uses Function.identity for identity value mapper",
+      "category": "stream_quality",
+      "max_score": 20,
+      "description": "Replaces card -> card with Function.identity() in the Card::id indexing collector."
+    },
+    {
+      "name": "Preserves merge functions",
+      "category": "safety",
+      "max_score": 16,
+      "description": "Keeps Integer::sum for duplicate state counts and keeps the left-wins merge function for duplicate card ids."
+    },
+    {
+      "name": "Preserves map suppliers",
+      "category": "safety",
+      "max_score": 14,
+      "description": "Keeps HashMap::new and LinkedHashMap::new where they were part of the original collector behavior."
+    },
+    {
+      "name": "Does not over-apply identity",
+      "category": "stream_quality",
+      "max_score": 14,
+      "description": "Does not replace the non-identity card -> card.displayName() mapper with Function.identity() or a custom identity helper."
+    },
+    {
+      "name": "Keeps downstream result behavior",
+      "category": "safety",
+      "max_score": 8,
+      "description": "Preserves values().stream().toList() behavior, method signatures, record fields, and imports without unrelated collector changes."
+    }
+  ],
+  "metadata": {
+    "invocation": "natural",
+    "task_type": "cleanup",
+    "evidence_type": "focused_reference",
+    "issue": "https://github.com/martinfrancois/java-streams-skill/issues/44",
+    "reference_selection": "Focused issue #44 coverage for Function.identity in toMap identity mappers.",
+    "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; do not report as ordinary broad lift if runtime references later teach this same collector shape."
+  }
+}
diff --git a/evals-reference/37-to-map-function-identity-mapper/task.md b/evals-reference/37-to-map-function-identity-mapper/task.md
new file mode 100644
index 0000000..c351c16
--- /dev/null
+++ b/evals-reference/37-to-map-function-identity-mapper/task.md
@@ -0,0 +1,38 @@
+# Clean up toMap identity mappers
+
+Refactor `StateIndexes.java`. Assume Java 17.
+
+Return the revised Java code only.
+
+```java
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+final class StateIndexes {
+    Map<String, Integer> runningCountsByState(List<String> normalizedStates) {
+        return normalizedStates.stream()
+                .collect(Collectors.toMap(state -> state, state -> 1, Integer::sum, HashMap::new));
+    }
+
+    List<Card> dedupeById(List<Card> normalized) {
+        return normalized.stream()
+                .collect(Collectors.toMap(Card::id, card -> card, (left, right) -> left, LinkedHashMap::new))
+                .values()
+                .stream()
+                .toList();
+    }
+
+    Map<String, String> displayNameById(List<Card> cards) {
+        return cards.stream()
+                .collect(Collectors.toMap(Card::id, card -> card.displayName(), (left, right) -> left));
+    }
+
+    record Card(String id, String displayName) {}
+}
+```
+
+Preserve duplicate-key merge behavior, explicit map suppliers, and the non-identity display-name
+mapper.
diff --git a/evals-reference/38-collector-owned-result-foreach-mutation/capability.txt b/evals-reference/38-collector-owned-result-foreach-mutation/capability.txt
new file mode 100644
index 0000000..891492c
--- /dev/null
+++ b/evals-reference/38-collector-owned-result-foreach-mutation/capability.txt
@@ -0,0 +1,2 @@
+Replace external stream mutation with result-producing stream terminals and collectors when the
+method is building a returned value.
diff --git a/evals-reference/38-collector-owned-result-foreach-mutation/criteria.json b/evals-reference/38-collector-owned-result-foreach-mutation/criteria.json
new file mode 100644
index 0000000..7ed38d6
--- /dev/null
+++ b/evals-reference/38-collector-owned-result-foreach-mutation/criteria.json
@@ -0,0 +1,56 @@
+{
+  "context": "Reference focused cleanup: stream pipelines that build returned values should use result-producing terminals instead of external mutation.",
+  "type": "weighted_checklist",
+  "checklist": [
+    {
+      "name": "Creates coherent Java 17 artifact",
+      "category": "safety",
+      "max_score": 8,
+      "description": "Returns complete Java 17-compatible WritableRoots code with necessary imports and methods intact."
+    },
+    {
+      "name": "Uses Stream.concat for ordered sources",
+      "category": "stream_quality",
+      "max_score": 20,
+      "description": "Builds configured and environment root streams and combines them before distinct().toList(), rather than mutating an external ArrayList from forEach."
+    },
+    {
+      "name": "Preserves ordered duplicate handling",
+      "category": "safety",
+      "max_score": 16,
+      "description": "Keeps configured roots before environment roots and preserves first-occurrence distinct behavior."
+    },
+    {
+      "name": "Uses collector-owned map accumulation",
+      "category": "stream_quality",
+      "max_score": 22,
+      "description": "Replaces counts.merge inside stream().forEach with Collectors.toMap or an equivalent collector-owned counting result."
+    },
+    {
+      "name": "Preserves duplicate count merge",
+      "category": "safety",
+      "max_score": 14,
+      "description": "Keeps duplicate normalized states counted with Integer::sum or equivalent addition, and keeps a mutable HashMap-style result when that behavior is preserved."
+    },
+    {
+      "name": "Rejects cosmetic mutation",
+      "category": "stream_quality",
+      "max_score": 12,
+      "description": "Does not replace forEach mutation with intermediate lists or maps that are immediately copied into another mutable result without improving the boundary."
+    },
+    {
+      "name": "Keeps helpers focused",
+      "category": "maintainability",
+      "max_score": 8,
+      "description": "Avoids parallel streams, global caches, or broad helper abstractions unrelated to result-producing accumulation."
+    }
+  ],
+  "metadata": {
+    "invocation": "natural",
+    "task_type": "cleanup",
+    "evidence_type": "focused_reference",
+    "issue": "https://github.com/martinfrancois/java-streams-skill/issues/42",
+    "reference_selection": "Focused issue #42 coverage for collector-owned results instead of stream forEach mutation.",
+    "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; do not report as ordinary broad lift if runtime references later teach this same side-effect boundary."
+  }
+}
diff --git a/evals-reference/38-collector-owned-result-foreach-mutation/task.md b/evals-reference/38-collector-owned-result-foreach-mutation/task.md
new file mode 100644
index 0000000..3b650b8
--- /dev/null
+++ b/evals-reference/38-collector-owned-result-foreach-mutation/task.md
@@ -0,0 +1,51 @@
+# Replace misleading stream mutation
+
+Refactor `WritableRoots.java` where stream pipelines should own returned results. Assume Java 17.
+
+Return the revised Java code only.
+
+```java
+import java.io.File;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.stream.Stream;
+
+final class WritableRoots {
+    List<Path> additionalWritableRoots(Path workflowDirectory, Map<String, Object> codex) {
+        List<Path> roots = new ArrayList<>();
+        list(codex, "additional_writable_roots", List.of()).stream()
+                .map(value -> workflowDirectory.resolve(value))
+                .forEach(roots::add);
+        environmentValue("ADDITIONAL_WRITABLE_ROOTS").stream()
+                .flatMap(value -> Arrays.stream(value.split(java.util.regex.Pattern.quote(File.pathSeparator))))
+                .map(String::trim)
+                .filter(value -> !value.isBlank())
+                .map(workflowDirectory::resolve)
+                .forEach(roots::add);
+        return roots.stream().distinct().toList();
+    }
+
+    Map<String, Integer> runningCountsByState(List<String> normalizedStates) {
+        Map<String, Integer> counts = new HashMap<>();
+        normalizedStates.stream().forEach(state -> counts.merge(state, 1, Integer::sum));
+        return counts;
+    }
+
+    private static List<String> list(Map<String, Object> map, String key, List<String> defaultValue) {
+        Object value = map.get(key);
+        return value instanceof List<?> values ? values.stream().map(Object::toString).toList() : defaultValue;
+    }
+
+    private static Optional<String> environmentValue(String name) {
+        return Optional.empty();
+    }
+}
+```
+
+Preserve configured-root precedence over environment roots, first-occurrence duplicate removal, and
+mutable `HashMap` result behavior.
diff --git a/evals-reference/39-batch-reference-lookup-before-rendering/capability.txt b/evals-reference/39-batch-reference-lookup-before-rendering/capability.txt
new file mode 100644
index 0000000..0e55d4f
--- /dev/null
+++ b/evals-reference/39-batch-reference-lookup-before-rendering/capability.txt
@@ -0,0 +1,2 @@
+Separate pure stream extraction, one batched lookup boundary, rendering, and explicit write phases
+when refactoring stream-heavy Java code.
diff --git a/evals-reference/39-batch-reference-lookup-before-rendering/criteria.json b/evals-reference/39-batch-reference-lookup-before-rendering/criteria.json
new file mode 100644
index 0000000..ae249b2
--- /dev/null
+++ b/evals-reference/39-batch-reference-lookup-before-rendering/criteria.json
@@ -0,0 +1,56 @@
+{
+  "context": "Reference focused cleanup: gather lookup IDs across a batch before rendering so helpers do not hide repeated network I/O.",
+  "type": "weighted_checklist",
+  "checklist": [
+    {
+      "name": "Creates coherent Java 17 snippets",
+      "category": "safety",
+      "max_score": 8,
+      "description": "Returns coherent Java 17-compatible revised snippets that preserve the represented class, records, helper methods, and public method shape."
+    },
+    {
+      "name": "Analyzes cards before lookup",
+      "category": "stream_quality",
+      "max_score": 18,
+      "description": "Creates an analysis or equivalent phase so each card's plan and prompt reference texts are known before card-state lookup happens."
+    },
+    {
+      "name": "Batches lookup IDs once",
+      "category": "stream_quality",
+      "max_score": 24,
+      "description": "Collects all prerequisite and prompt-reference lookup IDs across the full card batch, applies distinct at the batch boundary, and calls fetchCardStatesByIds once."
+    },
+    {
+      "name": "Keeps rendering pure",
+      "category": "maintainability",
+      "max_score": 14,
+      "description": "Makes prompt rendering use already available lookup results and avoids helper methods named like renderers that perform hidden network I/O."
+    },
+    {
+      "name": "Keeps writes explicit",
+      "category": "stream_quality",
+      "max_score": 12,
+      "description": "Keeps checklist synchronization or other externally visible writes outside stream pipelines and visible in an explicit per-card step."
+    },
+    {
+      "name": "Preserves reference behavior",
+      "category": "safety",
+      "max_score": 16,
+      "description": "Does not drop prompt references, plan references, card order, rendered-reference creation, or checklist sync behavior while separating phases."
+    },
+    {
+      "name": "Avoids stream-heavy overreach",
+      "category": "maintainability",
+      "max_score": 8,
+      "description": "Does not hide Trello or network side effects in map, peek, collectors, or broad custom frameworks."
+    }
+  ],
+  "metadata": {
+    "invocation": "natural",
+    "task_type": "cleanup",
+    "evidence_type": "focused_reference",
+    "issue": "https://github.com/martinfrancois/java-streams-skill/issues/41",
+    "reference_selection": "Focused issue #41 coverage for batch lookup before rendering.",
+    "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; do not report as ordinary broad lift if runtime references later teach this same phase-separation shape."
+  }
+}
diff --git a/evals-reference/39-batch-reference-lookup-before-rendering/task.md b/evals-reference/39-batch-reference-lookup-before-rendering/task.md
new file mode 100644
index 0000000..1e22cb5
--- /dev/null
+++ b/evals-reference/39-batch-reference-lookup-before-rendering/task.md
@@ -0,0 +1,63 @@
+# Separate batch lookup from rendering
+
+Refactor `ReferenceRenderer.java` to avoid hidden repeated lookups while keeping writes explicit.
+Assume Java 17.
+
+Return the revised Java snippets only.
+
+```java
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Stream;
+
+final class ReferenceRenderer {
+    List<Card> enrich(List<Card> cards, boolean includeReferenceContext) {
+        Map<String, Plan> plans = new LinkedHashMap<>();
+        for (Card card : cards) {
+            plans.put(card.id(), plan(card));
+        }
+        List<Card> enriched = new ArrayList<>();
+        for (Card card : cards) {
+            Plan plan = plans.get(card.id());
+            List<RenderedReference> references =
+                    includeReferenceContext ? promptReferences(card, plan, Map.of()) : List.of();
+            syncChecklist(card, plan);
+            enriched.add(card.withReferences(references));
+        }
+        return enriched;
+    }
+
+    private List<RenderedReference> promptReferences(Card card, Plan plan, Map<String, LookupResult> known) {
+        Map<String, ReferenceText> references = referenceTexts(card, plan);
+        List<String> missing = references.values().stream()
+                .map(ReferenceText::lookupId)
+                .filter(id -> !known.containsKey(id))
+                .distinct()
+                .toList();
+        Map<String, LookupResult> lookupResults = new LinkedHashMap<>(known);
+        lookupResults.putAll(fetchCardStatesByIds(missing));
+        return references.values().stream()
+                .map(reference -> render(reference, lookupResults.get(reference.lookupId())))
+                .toList();
+    }
+
+    private Plan plan(Card card) { return new Plan(card.references()); }
+    private Map<String, ReferenceText> referenceTexts(Card card, Plan plan) { return Map.of(); }
+    private Map<String, LookupResult> fetchCardStatesByIds(List<String> ids) { return Map.of(); }
+    private RenderedReference render(ReferenceText reference, LookupResult result) { return new RenderedReference(); }
+    private void syncChecklist(Card card, Plan plan) {}
+
+    record Card(String id, List<ReferenceText> references) {
+        Card withReferences(List<RenderedReference> references) { return this; }
+    }
+    record Plan(List<ReferenceText> references) {}
+    record ReferenceText(String key, String lookupId) {}
+    record LookupResult(String state) {}
+    record RenderedReference() {}
+}
+```
+
+The lookup method is a network boundary. Keep checklist synchronization explicit and outside stream
+pipelines.
diff --git a/evals-reference/40-mapmulti-declaration-extraction/capability.txt b/evals-reference/40-mapmulti-declaration-extraction/capability.txt
new file mode 100644
index 0000000..44428a7
--- /dev/null
+++ b/evals-reference/40-mapmulti-declaration-extraction/capability.txt
@@ -0,0 +1,2 @@
+Refactor Java 25 declaration extraction so streams produce the result directly and use mapMulti for
+zero-or-one line emissions.
diff --git a/evals-reference/40-mapmulti-declaration-extraction/criteria.json b/evals-reference/40-mapmulti-declaration-extraction/criteria.json
new file mode 100644
index 0000000..ee0c9a9
--- /dev/null
+++ b/evals-reference/40-mapmulti-declaration-extraction/criteria.json
@@ -0,0 +1,56 @@
+{
+  "context": "Reference focused cleanup: Java 25 line-to-zero-or-one stream transformations should prefer mapMulti over side-effecting forEach or line-level flatMap when appropriate.",
+  "type": "weighted_checklist",
+  "checklist": [
+    {
+      "name": "Creates coherent Java 25 artifact",
+      "category": "safety",
+      "max_score": 8,
+      "description": "Returns complete Java 25-compatible DeclarationExtractor code with necessary imports, records, methods, and Pattern usage intact."
+    },
+    {
+      "name": "Produces the result through the pipeline",
+      "category": "stream_quality",
+      "max_score": 18,
+      "description": "Refactors declarations(Card) so the stream pipeline returns the final declarations via toList rather than mutating an external List from forEach."
+    },
+    {
+      "name": "Uses mapMulti for zero-or-one emission",
+      "category": "stream_quality",
+      "max_score": 24,
+      "description": "Uses mapMulti or an equivalent Consumer-based emitter for the line-to-zero-or-one Declaration transformation, avoiding tiny Stream.of/Stream.empty allocations as the preferred final shape."
+    },
+    {
+      "name": "Keeps text-block flatMap acceptable",
+      "category": "stream_quality",
+      "max_score": 10,
+      "description": "Uses flatMap only where each text block naturally expands to a stream of declarations, and does not confuse that with the line-level zero-or-one transformation."
+    },
+    {
+      "name": "Preserves filters and matching",
+      "category": "safety",
+      "max_score": 18,
+      "description": "Filters null and blank text blocks, keeps LABELED_SOURCE.matcher(line).matches(), preserves the captured value, and searches title, description, and comments."
+    },
+    {
+      "name": "Avoids side-effecting stream terminals",
+      "category": "stream_quality",
+      "max_score": 12,
+      "description": "Does not keep text.lines().forEach(...) or helper methods whose purpose is mutating a caller-owned declarations list."
+    },
+    {
+      "name": "Keeps implementation readable",
+      "category": "maintainability",
+      "max_score": 10,
+      "description": "Extracts a small emitter helper when useful and avoids custom collectors, large abstractions, or unrelated parser changes."
+    }
+  ],
+  "metadata": {
+    "invocation": "natural",
+    "task_type": "cleanup",
+    "evidence_type": "focused_reference",
+    "issue": "https://github.com/martinfrancois/java-streams-skill/issues/40",
+    "reference_selection": "Focused issue #40 coverage for mapMulti declaration extraction instead of side-effecting forEach.",
+    "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; do not report as ordinary broad lift if runtime references later teach this same mapMulti shape."
+  }
+}
diff --git a/evals-reference/40-mapmulti-declaration-extraction/task.md b/evals-reference/40-mapmulti-declaration-extraction/task.md
new file mode 100644
index 0000000..3e57214
--- /dev/null
+++ b/evals-reference/40-mapmulti-declaration-extraction/task.md
@@ -0,0 +1,51 @@
+# Improve declaration extraction
+
+Refactor `DeclarationExtractor.java` for a Java 25 codebase. Assume Java 25.
+
+Return the revised Java code only.
+
+```java
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+final class DeclarationExtractor {
+    private static final Pattern LABELED_SOURCE = Pattern.compile("(?i)^source[\\t ]*:[\\t ]*(.*)$");
+
+    static List<Declaration> declarations(Card card) {
+        List<Declaration> declarations = new ArrayList<>();
+        addDeclarations(declarations, card.title());
+        addDeclarations(declarations, card.description());
+        card.comments().stream()
+                .map(Comment::text)
+                .forEach(text -> addDeclarations(declarations, text));
+        return List.copyOf(declarations);
+    }
+
+    private static void addDeclarations(List<Declaration> declarations, String text) {
+        if (text == null || text.isBlank()) {
+            return;
+        }
+        text.lines().forEach(line -> addDeclaration(declarations, line));
+    }
+
+    private static void addDeclaration(List<Declaration> declarations, String line) {
+        Matcher labeled = LABELED_SOURCE.matcher(line);
+        if (labeled.matches()) {
+            declarations.add(new Declaration(labeled.group(1)));
+        }
+    }
+
+    record Card(String title, String description, List<Comment> comments) {}
+    record Comment(String text) {}
+    record Declaration(String value) {}
+}
+```
+
+Each nonblank text block can contain zero or more declaration lines. Preserve title, description,
+comments, matching with `matches()`, and the unmodifiable result.
+
+For the line-to-zero-or-one declaration transformation, prefer a Java 25 stream shape using
+`mapMulti` or a small `Consumer` emitter helper rather than a side-effecting `forEach`, `filter`
+plus `map`, or tiny `Stream.of`/`Stream.empty` helpers.
diff --git a/evals-reference/41-tail-stream-equivalence-check/capability.txt b/evals-reference/41-tail-stream-equivalence-check/capability.txt
new file mode 100644
index 0000000..223532b
--- /dev/null
+++ b/evals-reference/41-tail-stream-equivalence-check/capability.txt
@@ -0,0 +1,2 @@
+Move expensive tail work into lazy stream pipelines so allMatch can short-circuit meaningful work
+while preserving conflict behavior.
diff --git a/evals-reference/41-tail-stream-equivalence-check/criteria.json b/evals-reference/41-tail-stream-equivalence-check/criteria.json
new file mode 100644
index 0000000..024b7aa
--- /dev/null
+++ b/evals-reference/41-tail-stream-equivalence-check/criteria.json
@@ -0,0 +1,56 @@
+{
+  "context": "Reference focused cleanup: allMatch only short-circuits expensive work when that work stays inside the lazy stream pipeline.",
+  "type": "weighted_checklist",
+  "checklist": [
+    {
+      "name": "Creates coherent Java 17 artifact",
+      "category": "safety",
+      "max_score": 8,
+      "description": "Returns complete Java 17-compatible DeclarationConflict code with methods, enum, and records intact."
+    },
+    {
+      "name": "Keeps explicit zero one many branching",
+      "category": "maintainability",
+      "max_score": 14,
+      "description": "Keeps the zero, one, and multiple declaration cases readable, for example with a switch or clear branch structure."
+    },
+    {
+      "name": "Parses first declaration once",
+      "category": "stream_quality",
+      "max_score": 16,
+      "description": "Parses the first declaration once and uses it as the comparison target for later declarations."
+    },
+    {
+      "name": "Moves tail parsing into lazy stream",
+      "category": "stream_quality",
+      "max_score": 24,
+      "description": "Streams declarations after the first with skip(1), maps each remaining declaration through parse inside the pipeline, and allMatch short-circuits on the first invalid or conflicting result."
+    },
+    {
+      "name": "Avoids eager parsed list",
+      "category": "stream_quality",
+      "max_score": 14,
+      "description": "Does not eagerly parse every declaration into a temporary list before calling allMatch."
+    },
+    {
+      "name": "Preserves fail-closed behavior",
+      "category": "safety",
+      "max_score": 16,
+      "description": "Accepts multiple declarations only when the first is selected and every remaining parsed selection is equivalent; blank, invalid, or conflicting declarations still return the conflict result."
+    },
+    {
+      "name": "Avoids noisy manual loop",
+      "category": "maintainability",
+      "max_score": 8,
+      "description": "Does not replace the tail-stream check with a more verbose manual loop unless the loop is clearly simpler and still keeps lazy parsing."
+    }
+  ],
+  "metadata": {
+    "invocation": "natural",
+    "task_type": "cleanup",
+    "evidence_type": "focused_reference",
+    "issue": "https://github.com/martinfrancois/java-streams-skill/issues/39",
+    "reference_selection": "Focused issue #39 coverage for tail-stream equivalence checks and allMatch laziness.",
+    "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; do not report as ordinary broad lift if runtime references later teach this same allMatch shape."
+  }
+}
diff --git a/evals-reference/41-tail-stream-equivalence-check/task.md b/evals-reference/41-tail-stream-equivalence-check/task.md
new file mode 100644
index 0000000..0d87340
--- /dev/null
+++ b/evals-reference/41-tail-stream-equivalence-check/task.md
@@ -0,0 +1,52 @@
+# Avoid eager work before allMatch
+
+Refactor `DeclarationConflict.java`. Assume Java 17.
+
+Return the revised Java code only.
+
+```java
+import java.util.ArrayList;
+import java.util.List;
+
+final class DeclarationConflict {
+    RepositorySourceSelection explicitSource(List<Declaration> declarations) {
+        if (declarations.isEmpty()) {
+            return RepositorySourceSelection.none();
+        }
+        if (declarations.size() == 1) {
+            Declaration declaration = declarations.get(0);
+            return parse(declaration.value(), declaration.mode());
+        }
+        List<RepositorySourceSelection> parsed = new ArrayList<>(declarations.size());
+        for (Declaration declaration : declarations) {
+            parsed.add(parse(declaration.value(), declaration.mode()));
+        }
+        RepositorySourceSelection first = parsed.get(0);
+        if (first.selected()
+                && parsed.stream().allMatch(selection -> equivalent(first, selection))) {
+            return first;
+        }
+        return RepositorySourceSelection.invalid("repository_source_conflict");
+    }
+
+    private RepositorySourceSelection parse(String value, SourceMode mode) {
+        return new RepositorySourceSelection(value, mode, value != null && !value.isBlank());
+    }
+
+    private static boolean equivalent(RepositorySourceSelection expected, RepositorySourceSelection actual) {
+        return actual.selected()
+                && expected.value().equals(actual.value())
+                && expected.mode() == actual.mode();
+    }
+
+    enum SourceMode { REMOTE, LOCAL }
+    record Declaration(String value, SourceMode mode) {}
+    record RepositorySourceSelection(String value, SourceMode mode, boolean selected) {
+        static RepositorySourceSelection none() { return new RepositorySourceSelection("", SourceMode.REMOTE, false); }
+        static RepositorySourceSelection invalid(String code) { return new RepositorySourceSelection(code, SourceMode.REMOTE, false); }
+    }
+}
+```
+
+Multiple declarations are valid only when the first parses to a selected source and every remaining
+declaration parses to an equivalent selected source. Preserve fail-closed conflict behavior.
diff --git a/evals-reference/42-foreach-side-effect-classification/capability.txt b/evals-reference/42-foreach-side-effect-classification/capability.txt
new file mode 100644
index 0000000..d91171c
--- /dev/null
+++ b/evals-reference/42-foreach-side-effect-classification/capability.txt
@@ -0,0 +1,2 @@
+Review stream forEach usage by classifying external mutation, true side-effect terminals, builder
+APIs, logging, and unsafe parallel shared mutation.
diff --git a/evals-reference/42-foreach-side-effect-classification/criteria.json b/evals-reference/42-foreach-side-effect-classification/criteria.json
new file mode 100644
index 0000000..09501dc
--- /dev/null
+++ b/evals-reference/42-foreach-side-effect-classification/criteria.json
@@ -0,0 +1,56 @@
+{
+  "context": "Reference focused review: stream forEach is a trigger for classification, not a blanket ban.",
+  "type": "weighted_checklist",
+  "checklist": [
+    {
+      "name": "Creates concrete review artifact",
+      "category": "safety",
+      "max_score": 8,
+      "description": "Creates review.md or equivalent concrete review output that addresses each marked method."
+    },
+    {
+      "name": "Recommends result-producing displayNames",
+      "category": "stream_quality",
+      "max_score": 22,
+      "description": "Recommends replacing stream().forEach(names::add) with a direct map/filter/toList result, preserving active filtering and immutable copy behavior."
+    },
+    {
+      "name": "Accepts builder side effect with caveats",
+      "category": "stream_quality",
+      "max_score": 16,
+      "description": "Recognizes applyHeaders as a defensible side-effect boundary because mutating the builder is the operation, while still checking ordering, exception behavior, and whether a loop would be clearer."
+    },
+    {
+      "name": "Accepts logging as side-effect purpose",
+      "category": "stream_quality",
+      "max_score": 12,
+      "description": "Does not force logging into a fake collector; explains that terminal side effects such as logging can be acceptable under the debug guard."
+    },
+    {
+      "name": "Rejects unsafe parallel mutation",
+      "category": "safety",
+      "max_score": 20,
+      "description": "Flags the parallelStream().forEach mutation of an int array as unsafe shared mutation and recommends count(), a primitive stream count, or another collector-owned result."
+    },
+    {
+      "name": "Distinguishes loop from collector fixes",
+      "category": "maintainability",
+      "max_score": 12,
+      "description": "Explains, either as a general principle or through concrete method-by-method rationale, that external API side effects may be clearer as a plain loop while collection-building should normally use a value-producing terminal."
+    },
+    {
+      "name": "Avoids blanket rule",
+      "category": "maintainability",
+      "max_score": 10,
+      "description": "Does not claim every forEach is wrong or every forEach should become a collector, and does not recommend parallelism without measurement."
+    }
+  ],
+  "metadata": {
+    "invocation": "natural",
+    "task_type": "review",
+    "evidence_type": "focused_reference",
+    "issue": "https://github.com/martinfrancois/java-streams-skill/issues/5",
+    "reference_selection": "Focused issue #5 coverage for classifying stream forEach side effects.",
+    "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; the runtime skill already teaches the general forEach classification policy."
+  }
+}
diff --git a/evals-reference/42-foreach-side-effect-classification/task.md b/evals-reference/42-foreach-side-effect-classification/task.md
new file mode 100644
index 0000000..cd5d4f7
--- /dev/null
+++ b/evals-reference/42-foreach-side-effect-classification/task.md
@@ -0,0 +1,54 @@
+# Review stream forEach side effects
+
+Review `ForEachReview.java`. Assume Java 17.
+
+Create `review.md` with concrete recommendations for each marked method. Do not rewrite the whole
+class; classify which `forEach` uses should become result-producing stream operations, which should
+be plain loops, and which can remain terminal side effects with caveats. Include ordering and
+exception-propagation caveats where they matter.
+
+```java
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.logging.Logger;
+
+final class ForEachReview {
+    private static final Logger LOG = Logger.getLogger(ForEachReview.class.getName());
+
+    List<String> displayNames(List<User> users) {
+        List<String> names = new ArrayList<>();
+        users.stream()
+                .filter(User::active)
+                .map(User::displayName)
+                .forEach(names::add);
+        return List.copyOf(names);
+    }
+
+    void applyHeaders(RequestBuilder builder, Map<String, String> headers) {
+        headers.entrySet().stream()
+                .filter(entry -> !entry.getValue().isBlank())
+                .forEach(entry -> builder.header(entry.getKey(), entry.getValue()));
+    }
+
+    void logDebugProperties(List<String> properties) {
+        if (LOG.isLoggable(java.util.logging.Level.FINE)) {
+            properties.stream().forEach(LOG::fine);
+        }
+    }
+
+    int countErrors(List<Result> results) {
+        int[] count = {0};
+        results.parallelStream()
+                .filter(Result::failed)
+                .forEach(result -> count[0]++);
+        return count[0];
+    }
+
+    record User(String displayName, boolean active) {}
+    record Result(boolean failed) {}
+    interface RequestBuilder {
+        void header(String name, String value);
+    }
+}
+```
diff --git a/evals-reference/NUMBERING.md b/evals-reference/NUMBERING.md
index c1fafa5..d75633b 100644
--- a/evals-reference/NUMBERING.md
+++ b/evals-reference/NUMBERING.md
@@ -29,6 +29,12 @@ the stream should use short glue lambdas or method references while extracted he
 multi-step derivation. Keep it in `evals-reference/` until targeted hosted evidence shows whether it
 belongs in main or regression.
 
+Numbers `29` through `42` were added during the July 2026 open-issue sweep. They cover focused
+reference scenarios for bounded duplicate lookup, findAny audits, immutable/result collection
+boundaries, predicate loops, parser-preserving streams, collector rationale, formatting, identity
+mappers, batched lookup phases, mapMulti extraction, tail allMatch checks, and forEach side-effect
+classification. Keep them in `evals-reference/` until isolated hosted runs classify each scenario.
+
 Number `25` contains the explicit hard-stop scan workflow audit that was demoted from the main eval
 set and later moved to `evals-regression/`. It requires exact skill-provided text, so report it as
 with-context regression coverage rather than as main or reference Java stream reasoning lift.