diff --git a/docs/agents/evals.md b/docs/agents/evals.md index 375ed30..b181085 100644 --- a/docs/agents/evals.md +++ b/docs/agents/evals.md @@ -265,7 +265,11 @@ Update this section whenever active eval membership or scoring changes. bodies into helpers. Its high multi-line-lambda criterion weight is intentional focused behavior-delta coverage, not ordinary broad lift evidence. - Hard-stop scan audits: regression explicit workflow-use only. -- Reference suite: 6 scenarios, 560 total checklist points. Deleted reference number 12 and +- Reference suite: 20 scenarios, 1960 total checklist points. Reference numbers `29` through `42` + cover the open issue sweep for bounded duplicate lookup, findAny audits, immutable/result + collection boundaries, predicate loops, parser-preserving streams, collector rationale, formatting, + identity mappers, batched lookup phases, mapMulti extraction, tail allMatch checks, and forEach + side-effect classification. Deleted reference number 12 and regression-moved scenarios are not counted. - Regression suite: 19 scenarios, 1820 total checklist points. - Hosted benchmark evidence is pending rerun for the current active suite. Do not publish exact diff --git a/evals-reference/29-bounded-duplicate-detection-stream/capability.txt b/evals-reference/29-bounded-duplicate-detection-stream/capability.txt new file mode 100644 index 0000000..4a88324 --- /dev/null +++ b/evals-reference/29-bounded-duplicate-detection-stream/capability.txt @@ -0,0 +1,2 @@ +Refactor duplicate-aware Java stream lookups so they inspect at most two matches while preserving +zero, one, and ambiguous-match behavior. diff --git a/evals-reference/29-bounded-duplicate-detection-stream/criteria.json b/evals-reference/29-bounded-duplicate-detection-stream/criteria.json new file mode 100644 index 0000000..d6087f8 --- /dev/null +++ b/evals-reference/29-bounded-duplicate-detection-stream/criteria.json @@ -0,0 +1,56 @@ +{ + "context": "Reference focused cleanup: duplicate-aware lookup helpers should use bounded stream collection without accepting ambiguous matches.", + "type": "weighted_checklist", + "checklist": [ + { + "name": "Creates coherent Java 21 artifact", + "category": "safety", + "max_score": 8, + "description": "Returns a complete revised ChecklistLookup.java snippet with necessary imports and Java 21-compatible code." + }, + { + "name": "Uses bounded duplicate detection", + "category": "stream_quality", + "max_score": 28, + "description": "Filters matching values, limits the stream to at most two matches, materializes only those bounded matches, and branches on zero, one, or ambiguous matches." + }, + { + "name": "Rejects findFirst shortcut", + "category": "stream_quality", + "max_score": 18, + "description": "Does not replace the loop with findFirst, findAny, or an equivalent first-match shortcut that would silently accept duplicate matches." + }, + { + "name": "Shares the repeated branch carefully", + "category": "maintainability", + "max_score": 16, + "description": "Extracts a small generic helper for the shared zero/one/ambiguous branch, or otherwise removes meaningful duplication without hiding the domain-specific predicate, error code, or message." + }, + { + "name": "Preserves matching behavior", + "category": "safety", + "max_score": 12, + "description": "Keeps Objects.equals-style null-safe matching for checklist names and item text, preserves input encounter order for the single returned match, and still returns null when no match exists." + }, + { + "name": "Preserves exceptions", + "category": "safety", + "max_score": 10, + "description": "Keeps the stable TrelloException type, duplicate error codes, and duplicate messages for both helper methods." + }, + { + "name": "Avoids over-engineering", + "category": "maintainability", + "max_score": 8, + "description": "Does not introduce broad lookup frameworks, caches, parallel streams, or unrelated API changes." + } + ], + "metadata": { + "invocation": "natural", + "task_type": "cleanup", + "evidence_type": "focused_reference", + "issue": "https://github.com/martinfrancois/java-streams-skill/issues/53", + "reference_selection": "Focused issue #53 coverage for bounded duplicate-detection stream lookups.", + "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; do not report as ordinary broad lift if runtime references later teach this same shape." + } +} diff --git a/evals-reference/29-bounded-duplicate-detection-stream/task.md b/evals-reference/29-bounded-duplicate-detection-stream/task.md new file mode 100644 index 0000000..4c46c6b --- /dev/null +++ b/evals-reference/29-bounded-duplicate-detection-stream/task.md @@ -0,0 +1,69 @@ +# Refactor duplicate-aware lookups + +Refactor `ChecklistLookup.java` with a stream-based implementation. Assume Java 21. + +Return the revised Java code only. + +```java +import java.util.List; +import java.util.Objects; + +final class ChecklistLookup { + static Card.Checklist singleChecklistByName(List checklists, String checklistName) { + Card.Checklist match = null; + for (Card.Checklist checklist : checklists) { + if (!Objects.equals(checklist.name(), checklistName)) { + continue; + } + if (match != null) { + throw new TrelloException( + "trello_checklist_ambiguous", + "Multiple Trello checklists match the requested checklist_name."); + } + match = checklist; + } + return match; + } + + static Card.ChecklistItem singleCheckItemByName(Card.Checklist checklist, String itemName) { + Card.ChecklistItem match = null; + for (Card.ChecklistItem item : checklist.items()) { + if (!Objects.equals(item.text(), itemName)) { + continue; + } + if (match != null) { + throw new TrelloException( + "trello_check_item_ambiguous", + "Multiple Trello checklist items match the requested item_name."); + } + match = item; + } + return match; + } + + record Card(List checklists) { + record Checklist(String name, List items) {} + record ChecklistItem(String text) {} + } + + static final class TrelloException extends RuntimeException { + private final String code; + + TrelloException(String code, String message) { + super(message); + this.code = code; + } + + String code() { + return code; + } + } +} +``` + +Preserve null-safe name matching, no-match `null` behavior, encounter order for the single returned +match, and the existing exception codes and messages. The lookup only needs to distinguish zero +matches, exactly one match, and at least two matches, so do not scan or retain matches after +ambiguity is already proven. If both lookup methods need the same zero, one, or ambiguous branch, +extract that branch into a small shared helper while keeping the predicates and error contracts +domain-specific. Keep the code small. diff --git a/evals-reference/30-prefer-findany-equivalent-matches/capability.txt b/evals-reference/30-prefer-findany-equivalent-matches/capability.txt new file mode 100644 index 0000000..931c377 --- /dev/null +++ b/evals-reference/30-prefer-findany-equivalent-matches/capability.txt @@ -0,0 +1,2 @@ +Audit Optional-returning stream terminals and choose findAny only when encounter order is not part +of the result contract. diff --git a/evals-reference/30-prefer-findany-equivalent-matches/criteria.json b/evals-reference/30-prefer-findany-equivalent-matches/criteria.json new file mode 100644 index 0000000..d6c69ed --- /dev/null +++ b/evals-reference/30-prefer-findany-equivalent-matches/criteria.json @@ -0,0 +1,50 @@ +{ + "context": "Reference focused cleanup: findAny should express equivalent-match lookups while findFirst remains for ordered contracts.", + "type": "weighted_checklist", + "checklist": [ + { + "name": "Creates coherent Java 17 artifact", + "category": "safety", + "max_score": 8, + "description": "Returns revised Java 17-compatible LookupTerminals code with imports, methods, helper, and record intact." + }, + { + "name": "Uses findAny for equivalent matches", + "category": "stream_quality", + "max_score": 30, + "description": "Changes exact or normalized configured-name lookups to findAny because all valid matches are equivalent or expected to be unique by contract." + }, + { + "name": "Preserves ordered first-match contracts", + "category": "stream_quality", + "max_score": 24, + "description": "Keeps findFirst for PATH-style search order and first output line selection where encounter order selects the result." + }, + { + "name": "Explains retained findFirst calls", + "category": "maintainability", + "max_score": 12, + "description": "Adds concise comments or equivalent explanation for each retained findFirst call that identify the order contract rather than relying on current sequential behavior." + }, + { + "name": "Avoids mechanical replacement", + "category": "safety", + "max_score": 14, + "description": "Does not replace every findFirst mechanically, does not use findAny for fallback or first-line behavior, and does not claim tests alone prove order irrelevance." + }, + { + "name": "Keeps filters and normalization", + "category": "safety", + "max_score": 12, + "description": "Preserves case-insensitive list matching, closed-list filtering, normalization, path resolution, and output-line filtering." + } + ], + "metadata": { + "invocation": "natural", + "task_type": "cleanup", + "evidence_type": "focused_reference", + "issue": "https://github.com/martinfrancois/java-streams-skill/issues/51", + "reference_selection": "Focused issue #51 coverage for findAny versus findFirst semantic audits.", + "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; do not report as ordinary broad lift if runtime references later teach this same audit shape." + } +} diff --git a/evals-reference/30-prefer-findany-equivalent-matches/task.md b/evals-reference/30-prefer-findany-equivalent-matches/task.md new file mode 100644 index 0000000..13fdd00 --- /dev/null +++ b/evals-reference/30-prefer-findany-equivalent-matches/task.md @@ -0,0 +1,49 @@ +# Audit Optional stream terminals + +Refactor `LookupTerminals.java` only where the terminal operation's contract is clearer. Assume Java 17. + +Return the revised Java code and one brief comment beside each retained `findFirst()` explaining +why the first match is semantically required. + +```java +import java.nio.file.Path; +import java.util.List; +import java.util.Locale; +import java.util.Optional; + +final class LookupTerminals { + static Optional detectedList(List openListNames, String expectedName) { + return openListNames.stream() + .filter(name -> name.equalsIgnoreCase(expectedName)) + .findFirst(); + } + + static Optional targetList(List lists, String configuredName) { + String expected = normalize(configuredName); + return lists.stream() + .filter(list -> !list.closed()) + .filter(list -> normalize(list.name()).equals(expected)) + .findFirst(); + } + + static Optional firstExistingPath(List searchPath, String commandName) { + return searchPath.stream() + .map(path -> path.resolve(commandName)) + .filter(path -> path.toFile().exists()) + .findFirst(); + } + + static Optional firstVersionLine(String output) { + return output.lines() + .map(String::stripLeading) + .filter(line -> line.startsWith("java ")) + .findFirst(); + } + + private static String normalize(String value) { + return value.toLowerCase(Locale.ROOT).replaceAll("\\s+", " ").strip(); + } + + record BoardList(String id, String name, boolean closed) {} +} +``` diff --git a/evals-reference/31-immutable-result-append-list/capability.txt b/evals-reference/31-immutable-result-append-list/capability.txt new file mode 100644 index 0000000..d94ef80 --- /dev/null +++ b/evals-reference/31-immutable-result-append-list/capability.txt @@ -0,0 +1,2 @@ +Replace simple temporary append buffers with direct stream-owned immutable results while keeping +mutable builders where they remain clearer. diff --git a/evals-reference/31-immutable-result-append-list/criteria.json b/evals-reference/31-immutable-result-append-list/criteria.json new file mode 100644 index 0000000..c725c3f --- /dev/null +++ b/evals-reference/31-immutable-result-append-list/criteria.json @@ -0,0 +1,50 @@ +{ + "context": "Reference focused cleanup: a temporary mutable append list can become a direct immutable result when mutability is not part of the method contract.", + "type": "weighted_checklist", + "checklist": [ + { + "name": "Creates coherent Java 17 artifact", + "category": "safety", + "max_score": 8, + "description": "Returns complete Java 17-compatible ManifestUpdate code with imports, records, methods, and constructor behavior intact." + }, + { + "name": "Replaces simple append buffer", + "category": "stream_quality", + "max_score": 28, + "description": "Refactors withBoard to produce filtered existing boards plus the new board directly, for example with Stream.concat and Stream.of, instead of creating a mutable append buffer." + }, + { + "name": "Preserves encounter order", + "category": "safety", + "max_score": 18, + "description": "Keeps all retained existing boards in original order and appends the new board after them." + }, + { + "name": "Audits result mutability", + "category": "stream_quality", + "max_score": 14, + "description": "Recognizes that the manifest constructor copies input, so the temporary list mutability is not part of the public result contract." + }, + { + "name": "Keeps complex builder when clearer", + "category": "maintainability", + "max_score": 16, + "description": "Does not force withOptionalSections into a dense stream when the conditional append and optional summary row are clearer as a small builder or loop." + }, + { + "name": "Avoids unrelated changes", + "category": "safety", + "max_score": 16, + "description": "Does not change sameBoardOrWorkflow semantics, archive filtering, record fields, constructor copying, or method signatures." + } + ], + "metadata": { + "invocation": "natural", + "task_type": "cleanup", + "evidence_type": "focused_reference", + "issue": "https://github.com/martinfrancois/java-streams-skill/issues/50", + "reference_selection": "Focused issue #50 coverage for immutable result production over temporary append lists.", + "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; do not report as ordinary broad lift if runtime references later teach this same shape." + } +} diff --git a/evals-reference/31-immutable-result-append-list/task.md b/evals-reference/31-immutable-result-append-list/task.md new file mode 100644 index 0000000..0413a6f --- /dev/null +++ b/evals-reference/31-immutable-result-append-list/task.md @@ -0,0 +1,51 @@ +# Remove unnecessary temporary mutability + +Refactor `ManifestUpdate.java` where doing so improves readability without changing behavior. +Assume Java 17. + +Return the revised Java code only. + +```java +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; + +record ConnectedBoardManifest(List boards) { + ConnectedBoardManifest { + boards = List.copyOf(boards); + } + + ConnectedBoardManifest withBoard(ConnectedBoard board) { + List updated = new ArrayList<>(boards.stream() + .filter(existing -> !sameBoardOrWorkflow(existing, board)) + .toList()); + updated.add(board); + return new ConnectedBoardManifest(updated); + } + + ConnectedBoardManifest withOptionalSections(List selected, boolean includeArchived) { + List updated = new ArrayList<>(); + for (ConnectedBoard board : selected) { + if (!board.archived() || includeArchived) { + updated.add(board); + } + } + if (includeArchived) { + updated.add(new ConnectedBoard("archive-summary", null, true)); + } + return new ConnectedBoardManifest(updated); + } + + private static boolean sameBoardOrWorkflow(ConnectedBoard left, ConnectedBoard right) { + return left.boardId().equals(right.boardId()) + || left.workflowPath() != null && left.workflowPath().equals(right.workflowPath()); + } +} + +record ConnectedBoard(String boardId, String workflowPath, boolean archived) {} +``` + +The manifest constructor copies its input. Preserve encounter order, filtering, duplicate handling, +and public API shape. Only refactor the simple temporary append-buffer case when the stream result +stays readable; leave the conditional builder method imperative if the current loop is clearer than a +dense stream expression. diff --git a/evals-reference/32-predicate-loop-any-match/capability.txt b/evals-reference/32-predicate-loop-any-match/capability.txt new file mode 100644 index 0000000..23c7c02 --- /dev/null +++ b/evals-reference/32-predicate-loop-any-match/capability.txt @@ -0,0 +1,2 @@ +Replace pure predicate loops with anyMatch while keeping side effects, diagnostics, and indexes out +of stream pipelines. diff --git a/evals-reference/32-predicate-loop-any-match/criteria.json b/evals-reference/32-predicate-loop-any-match/criteria.json new file mode 100644 index 0000000..b55c2b7 --- /dev/null +++ b/evals-reference/32-predicate-loop-any-match/criteria.json @@ -0,0 +1,56 @@ +{ + "context": "Reference focused cleanup: pure predicate loops can become anyMatch while side-effecting or index-sensitive loops should remain imperative.", + "type": "weighted_checklist", + "checklist": [ + { + "name": "Creates coherent Java 17 artifact", + "category": "safety", + "max_score": 8, + "description": "Returns complete Java 17-compatible ManifestChecks code with necessary imports and all methods present." + }, + { + "name": "Uses anyMatch for boolean predicate", + "category": "stream_quality", + "max_score": 24, + "description": "Refactors hasNonObjectBoardRow to use StreamSupport.stream(..., false).anyMatch(...) or an equivalent short-circuiting stream over the JsonNode iterable." + }, + { + "name": "Keeps terminal warning side effect outside stream", + "category": "stream_quality", + "max_score": 18, + "description": "Computes whether an invalid root exists with anyMatch, then adds exactly one warning outside the stream instead of mutating warnings inside the stream pipeline." + }, + { + "name": "Preserves JsonNode behavior", + "category": "safety", + "max_score": 16, + "description": "Keeps null roots as no-op, non-array roots as the array warning, and non-textual or blank values as the non-blank-strings warning." + }, + { + "name": "Rejects collector and forEach workarounds", + "category": "stream_quality", + "max_score": 14, + "description": "Does not collect invalid roots just to check emptiness and does not use forEach with an external boolean holder or warning mutation." + }, + { + "name": "Preserves index-sensitive loop", + "category": "maintainability", + "max_score": 12, + "description": "Keeps writeRows as a clear loop or otherwise preserves the index-specific output without forcing a noisy stream rewrite." + }, + { + "name": "Explains terminal choice through code", + "category": "maintainability", + "max_score": 8, + "description": "Uses anyMatch for existence questions rather than findAny or findFirst when the required result is a boolean." + } + ], + "metadata": { + "invocation": "natural", + "task_type": "cleanup", + "evidence_type": "focused_reference", + "issue": "https://github.com/martinfrancois/java-streams-skill/issues/49", + "reference_selection": "Focused issue #49 coverage for predicate-only loops as anyMatch.", + "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; do not report as ordinary broad lift if runtime references later teach this same shape." + } +} diff --git a/evals-reference/32-predicate-loop-any-match/task.md b/evals-reference/32-predicate-loop-any-match/task.md new file mode 100644 index 0000000..adc50db --- /dev/null +++ b/evals-reference/32-predicate-loop-any-match/task.md @@ -0,0 +1,49 @@ +# Clean up predicate-only loops + +Refactor `ManifestChecks.java` where a stream terminal makes the intent clearer. Assume Java 17. + +Return the revised Java code only. + +```java +import com.fasterxml.jackson.databind.JsonNode; +import java.util.List; + +final class ManifestChecks { + static boolean hasNonObjectBoardRow(JsonNode root) { + JsonNode boards = root.path("boards"); + for (JsonNode board : boards) { + if (!board.isObject()) { + return true; + } + } + return false; + } + + static void requireWritableRoots(JsonNode board, String label, List warnings) { + JsonNode roots = board.get("additionalWritableRoots"); + if (roots == null) { + return; + } + if (!roots.isArray()) { + warnings.add("Entry " + label + " field additionalWritableRoots must be an array."); + return; + } + for (JsonNode root : roots) { + if (!root.isTextual() || root.asText().isBlank()) { + warnings.add("Entry " + label + " field additionalWritableRoots must contain non-blank strings."); + return; + } + } + } + + static void writeRows(List rows, JsonNode boards) { + int index = 0; + for (JsonNode board : boards) { + rows.add(index + ":" + board.path("name").asText()); + index++; + } + } +} +``` + +Preserve warning text, short-circuit behavior, and index-sensitive row output. diff --git a/evals-reference/33-final-collection-boundary/capability.txt b/evals-reference/33-final-collection-boundary/capability.txt new file mode 100644 index 0000000..e5c505b --- /dev/null +++ b/evals-reference/33-final-collection-boundary/capability.txt @@ -0,0 +1,2 @@ +Choose direct final collection results when streams own the output, and keep explicit accumulator +boundaries when mutation is the real operation. diff --git a/evals-reference/33-final-collection-boundary/criteria.json b/evals-reference/33-final-collection-boundary/criteria.json new file mode 100644 index 0000000..e36af64 --- /dev/null +++ b/evals-reference/33-final-collection-boundary/criteria.json @@ -0,0 +1,56 @@ +{ + "context": "Reference focused cleanup: choose between stream-owned final collections and honest mutable accumulator boundaries.", + "type": "weighted_checklist", + "checklist": [ + { + "name": "Creates coherent Java 17 artifact", + "category": "safety", + "max_score": 8, + "description": "Returns complete Java 17-compatible ReservationDiscovery code with imports, methods, record, and interface intact." + }, + { + "name": "Collects final set directly", + "category": "stream_quality", + "max_score": 26, + "description": "Refactors localWorkflowFilePortReservations so the Files.list stream directly returns the final Set result instead of collecting to a temporary list and copying it with addAll." + }, + { + "name": "Uses explicit immutable or set collector intent", + "category": "stream_quality", + "max_score": 14, + "description": "Chooses a set-producing terminal or collector whose mutability/order semantics are audited, rather than using toList only to copy into a HashSet." + }, + { + "name": "Preserves IO behavior", + "category": "safety", + "max_score": 16, + "description": "Still returns an empty set for null config dirs, non-directories, or IOException while listing files, and keeps the same file filters and Optional flattening." + }, + { + "name": "Keeps accumulator boundary honest", + "category": "stream_quality", + "max_score": 18, + "description": "Does not keep or introduce toList plus addAll in extendCleanupList when the temporary list only feeds the existing mutable cleanup accumulator; uses a direct append boundary or a clear loop." + }, + { + "name": "Avoids forEach misuse", + "category": "maintainability", + "max_score": 10, + "description": "Does not replace a result-producing helper with stream().forEach(set::add) when returning a final collection would be clearer." + }, + { + "name": "No unrelated behavior changes", + "category": "safety", + "max_score": 8, + "description": "Keeps best-effort cleanup exception handling, duplicate collapsing for set results, and existing method signatures." + } + ], + "metadata": { + "invocation": "natural", + "task_type": "cleanup", + "evidence_type": "focused_reference", + "issue": "https://github.com/martinfrancois/java-streams-skill/issues/48", + "reference_selection": "Focused issue #48 coverage for final collection boundaries versus addAll copies.", + "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; do not report as ordinary broad lift if runtime references later teach this same shape." + } +} diff --git a/evals-reference/33-final-collection-boundary/task.md b/evals-reference/33-final-collection-boundary/task.md new file mode 100644 index 0000000..d57c679 --- /dev/null +++ b/evals-reference/33-final-collection-boundary/task.md @@ -0,0 +1,63 @@ +# Clean up final collection boundaries + +Refactor `ReservationDiscovery.java` where a stream can own the final result. Assume Java 17. + +Return the revised Java code only. + +```java +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Optional; +import java.util.Set; + +final class ReservationDiscovery { + Set localWorkflowFilePortReservations(Path configDir, ConnectedBoard ignoredBoard) { + Set reserved = new HashSet<>(); + if (configDir == null || !Files.isDirectory(configDir)) { + return reserved; + } + try (var files = Files.list(configDir)) { + List workflowPorts = files.filter(Files::isRegularFile) + .filter(file -> file.getFileName().toString().endsWith(".md")) + .filter(file -> ignoredBoard.workflowPath() == null + || !file.equals(ignoredBoard.workflowPath())) + .map(this::serverPort) + .flatMap(Optional::stream) + .toList(); + reserved.addAll(workflowPorts); + } catch (IOException ignored) { + // Leave only manifest and probe checks. + } + return reserved; + } + + void extendCleanupList(List boardIds, Trello trello, String workspaceId, String runId) { + try { + List openDisposableBoardIds = trello.openBoardIdsByNamePrefix(workspaceId, runId).stream() + .filter(boardId -> !boardIds.contains(boardId)) + .toList(); + boardIds.addAll(openDisposableBoardIds); + } catch (RuntimeException ignored) { + // Cleanup is best effort. + } + } + + private Optional serverPort(Path workflow) { + return Optional.empty(); + } + + record ConnectedBoard(Path workflowPath) {} + + interface Trello { + List openBoardIdsByNamePrefix(String workspaceId, String runId); + } +} +``` + +Preserve empty and unreadable-directory behavior. The cleanup list method intentionally extends an +existing mutable accumulator; avoid creating a temporary stream result whose only purpose is to be +copied into that accumulator. diff --git a/evals-reference/34-preserve-regex-line-splitting/capability.txt b/evals-reference/34-preserve-regex-line-splitting/capability.txt new file mode 100644 index 0000000..6497f1d --- /dev/null +++ b/evals-reference/34-preserve-regex-line-splitting/capability.txt @@ -0,0 +1,2 @@ +Refactor stream line parsing while preserving regex line-break semantics and avoiding external +mutable accumulation. diff --git a/evals-reference/34-preserve-regex-line-splitting/criteria.json b/evals-reference/34-preserve-regex-line-splitting/criteria.json new file mode 100644 index 0000000..65d5b8a --- /dev/null +++ b/evals-reference/34-preserve-regex-line-splitting/criteria.json @@ -0,0 +1,56 @@ +{ + "context": "Reference focused cleanup: stream refactors must preserve regex-based line splitting semantics.", + "type": "weighted_checklist", + "checklist": [ + { + "name": "Creates coherent Java 17 artifact", + "category": "safety", + "max_score": 8, + "description": "Returns complete Java 17-compatible SourceDeclarations code with imports, records, methods, and Pattern usage intact." + }, + { + "name": "Preserves regex line-break contract", + "category": "safety", + "max_score": 24, + "description": "Does not replace split(\"\\\\R\", -1) with String.lines() unless it explicitly preserves the original regex line-break semantics; uses Pattern.splitAsStream or an equivalent regex-preserving source." + }, + { + "name": "Produces declarations through stream result", + "category": "stream_quality", + "max_score": 22, + "description": "Refactors the top-level extraction so streams produce the returned declarations directly rather than using forEach to mutate an external list." + }, + { + "name": "Preserves inputs searched", + "category": "safety", + "max_score": 14, + "description": "Still searches title, description, and every comment text, and still ignores null or blank text blocks." + }, + { + "name": "Preserves matching rule", + "category": "safety", + "max_score": 12, + "description": "Keeps LABELED_SOURCE.matcher(line).matches() semantics and Declaration construction from the captured source value." + }, + { + "name": "Avoids side-effecting stream terminal", + "category": "stream_quality", + "max_score": 12, + "description": "Does not keep text stream processing as text.lines().forEach(...) or another stream terminal that mutates a caller-owned list." + }, + { + "name": "Keeps refactor focused", + "category": "maintainability", + "max_score": 8, + "description": "Does not add broad parser features, change source labels, change public records, or introduce unrelated abstractions." + } + ], + "metadata": { + "invocation": "natural", + "task_type": "cleanup", + "evidence_type": "focused_reference", + "issue": "https://github.com/martinfrancois/java-streams-skill/issues/47", + "reference_selection": "Focused issue #47 coverage for regex line-splitting preservation during stream refactors.", + "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; do not report as ordinary broad lift if runtime references later teach this same shape." + } +} diff --git a/evals-reference/34-preserve-regex-line-splitting/task.md b/evals-reference/34-preserve-regex-line-splitting/task.md new file mode 100644 index 0000000..fda57fa --- /dev/null +++ b/evals-reference/34-preserve-regex-line-splitting/task.md @@ -0,0 +1,46 @@ +# Preserve parser line splitting + +Refactor `SourceDeclarations.java` to make the declaration extraction more stream-oriented without +changing the parser contract. Assume Java 17. + +Return the revised Java code only. + +```java +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +final class SourceDeclarations { + private static final Pattern LABELED_SOURCE = Pattern.compile("(?i)^source[\\t ]*:[\\t ]*(.*)$"); + + static List declarations(Card card) { + List declarations = new ArrayList<>(); + addDeclarations(declarations, card.title()); + addDeclarations(declarations, card.description()); + card.comments().stream() + .map(Comment::text) + .forEach(text -> addDeclarations(declarations, text)); + return List.copyOf(declarations); + } + + private static void addDeclarations(List declarations, String text) { + if (text == null || text.isBlank()) { + return; + } + for (String line : text.split("\\R", -1)) { + Matcher labeled = LABELED_SOURCE.matcher(line); + if (labeled.matches()) { + declarations.add(new Declaration(labeled.group(1))); + } + } + } + + record Card(String title, String description, List comments) {} + record Comment(String text) {} + record Declaration(String value) {} +} +``` + +The existing parser recognizes Java regex `\R` line breaks. Preserve null and blank text behavior, +title/description/comment coverage, and unmodifiable returned-list behavior. diff --git a/evals-reference/35-to-collection-rationale/capability.txt b/evals-reference/35-to-collection-rationale/capability.txt new file mode 100644 index 0000000..645abab --- /dev/null +++ b/evals-reference/35-to-collection-rationale/capability.txt @@ -0,0 +1,2 @@ +Audit Collectors.toCollection calls and keep concrete collection types only when mutability, +membership, or order semantics require them. diff --git a/evals-reference/35-to-collection-rationale/criteria.json b/evals-reference/35-to-collection-rationale/criteria.json new file mode 100644 index 0000000..31972d8 --- /dev/null +++ b/evals-reference/35-to-collection-rationale/criteria.json @@ -0,0 +1,56 @@ +{ + "context": "Reference focused cleanup: concrete toCollection collectors should be justified or simplified based on collection semantics.", + "type": "weighted_checklist", + "checklist": [ + { + "name": "Creates coherent Java 17 artifact", + "category": "safety", + "max_score": 8, + "description": "Returns complete Java 17-compatible CollectorChoices code with imports, records, and methods intact." + }, + { + "name": "Keeps mutable collector with rationale", + "category": "stream_quality", + "max_score": 18, + "description": "Keeps the ArrayList collection for terminalCards only because the result is appended to before the immutable copy, and makes that mutability reason clear with a concise comment or equivalent code structure. Award full credit when the rationale is visible near the collector or later mutation; do not penalize harmless comment placement." + }, + { + "name": "Simplifies unnecessary HashSet collector", + "category": "stream_quality", + "max_score": 20, + "description": "Simplifies archivedListIds when the concrete HashSet implementation is not required, while avoiding ambiguous or policy-violating collector choices if the surrounding codebase requires explicit mutability." + }, + { + "name": "Preserves order-preserving de-duplication", + "category": "stream_quality", + "max_score": 18, + "description": "Keeps LinkedHashSet or an equivalent encounter-order-preserving set for unconnectedWorkflowPaths, because diagnostics should de-duplicate while preserving encounter order." + }, + { + "name": "Documents concrete type reasons", + "category": "maintainability", + "max_score": 16, + "description": "Adds concise comments or code structure that explains mutability, membership, or encounter-order reasons instead of restating the collector syntax. Award full credit when the reason is clear even if the comment is placed just after the collector expression." + }, + { + "name": "Avoids mechanical replacement", + "category": "safety", + "max_score": 12, + "description": "Does not replace every toCollection mechanically with toSet, toList, or stream().forEach(...) without checking mutability and order behavior." + }, + { + "name": "Keeps behavior unchanged", + "category": "safety", + "max_score": 8, + "description": "Preserves archive-summary append behavior, closed-list filtering, selected-path filtering, and method signatures." + } + ], + "metadata": { + "invocation": "natural", + "task_type": "cleanup", + "evidence_type": "focused_reference", + "issue": "https://github.com/martinfrancois/java-streams-skill/issues/46", + "reference_selection": "Focused issue #46 coverage for explaining or simplifying Collectors.toCollection choices.", + "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; do not report as ordinary broad lift if runtime references later teach this same shape." + } +} diff --git a/evals-reference/35-to-collection-rationale/task.md b/evals-reference/35-to-collection-rationale/task.md new file mode 100644 index 0000000..9e6ba84 --- /dev/null +++ b/evals-reference/35-to-collection-rationale/task.md @@ -0,0 +1,43 @@ +# Clarify concrete collector choices + +Review and clean up `CollectorChoices.java`. Assume Java 17. + +Return the revised Java code only. Keep concrete collection collectors only where the concrete type +is part of the method's behavior. When a later operation requires mutability or encounter-order +preserving de-duplication, keep that concrete collector and make the reason visible in the code. + +```java +import java.util.ArrayList; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.SequencedSet; +import java.util.Set; +import java.util.stream.Collectors; + +final class CollectorChoices { + static List terminalCards(List boardCards) { + List normalized = boardCards.stream() + .filter(Card::terminal) + .collect(Collectors.toCollection(ArrayList::new)); + normalized.add(new Card("archive-summary", true)); + return List.copyOf(normalized); + } + + static Set archivedListIds(List lists) { + return lists.stream() + .filter(BoardList::closed) + .map(BoardList::id) + .collect(Collectors.toCollection(HashSet::new)); + } + + static SequencedSet unconnectedWorkflowPaths(List reported, Set selected) { + return reported.stream() + .filter(path -> !selected.contains(path)) + .collect(Collectors.toCollection(LinkedHashSet::new)); + } + + record Card(String id, boolean terminal) {} + record BoardList(String id, boolean closed) {} +} +``` diff --git a/evals-reference/36-wrap-multi-operation-stream-chains/capability.txt b/evals-reference/36-wrap-multi-operation-stream-chains/capability.txt new file mode 100644 index 0000000..f516252 --- /dev/null +++ b/evals-reference/36-wrap-multi-operation-stream-chains/capability.txt @@ -0,0 +1,2 @@ +Format multi-operation Java stream chains so each operation after stream() is scannable without +changing stream semantics. diff --git a/evals-reference/36-wrap-multi-operation-stream-chains/criteria.json b/evals-reference/36-wrap-multi-operation-stream-chains/criteria.json new file mode 100644 index 0000000..f559764 --- /dev/null +++ b/evals-reference/36-wrap-multi-operation-stream-chains/criteria.json @@ -0,0 +1,50 @@ +{ + "context": "Reference focused cleanup: multi-operation stream chains should be wrapped for readability without semantic changes.", + "type": "weighted_checklist", + "checklist": [ + { + "name": "Creates coherent Java 17 artifact", + "category": "safety", + "max_score": 8, + "description": "Returns complete Java 17-compatible StreamFormattingSample code with imports, methods, and records intact." + }, + { + "name": "Wraps multi-operation chains", + "category": "maintainability", + "max_score": 34, + "description": "Moves each operation after stream() to its own continuation line for chains with more than one operation after the source stream call." + }, + { + "name": "Keeps stream on source line", + "category": "maintainability", + "max_score": 18, + "description": "Keeps .stream() attached to the source expression rather than moving it to a separate line." + }, + { + "name": "Handles single-operation chain proportionately", + "category": "maintainability", + "max_score": 10, + "description": "Leaves the one-operation labels.stream().findFirst() chain on one line or wraps it coherently; does not apply unrelated API churn or move .stream() onto a standalone line just to satisfy the multi-operation wrapping rule." + }, + { + "name": "Preserves stream behavior", + "category": "safety", + "max_score": 18, + "description": "Does not change terminal operations, collectors, mapping behavior, encounter order, fallback value, or map keys while formatting." + }, + { + "name": "Avoids style-only API churn", + "category": "safety", + "max_score": 12, + "description": "Does not introduce different collectors, helper methods, parallel streams, or unrelated imports while making a formatting-only change." + } + ], + "metadata": { + "invocation": "natural", + "task_type": "cleanup", + "evidence_type": "focused_reference", + "issue": "https://github.com/martinfrancois/java-streams-skill/issues/45", + "reference_selection": "Focused issue #45 coverage for readable multi-operation stream-chain wrapping.", + "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; do not report as ordinary broad lift if runtime references later teach this same style rule." + } +} diff --git a/evals-reference/36-wrap-multi-operation-stream-chains/task.md b/evals-reference/36-wrap-multi-operation-stream-chains/task.md new file mode 100644 index 0000000..8ff7802 --- /dev/null +++ b/evals-reference/36-wrap-multi-operation-stream-chains/task.md @@ -0,0 +1,44 @@ +# Format stream chains + +Format `StreamFormattingSample.java` for readability without changing behavior. Assume Java 17. + +Return the revised Java code only. + +```java +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +final class StreamFormattingSample { + Set normalizedLabels(Card card) { + return card.labels().stream().map(StreamFormattingSample::normalize).collect(Collectors.toSet()); + } + + String firstLabel(Card card) { + return card.labels().stream().findFirst().orElse("none"); + } + + void addCardFields(Card card, java.util.Map values) { + values.put("checklists", card.checklists().stream().map(Checklist::asMap).toList()); + values.put("attachments", card.attachments().stream().map(Attachment::asMap).toList()); + } + + private static String normalize(String value) { + return value.toLowerCase(java.util.Locale.ROOT).strip(); + } + + record Card(List labels, List checklists, List attachments) {} + record Checklist(String name) { + java.util.Map asMap() { + return java.util.Map.of("name", name); + } + } + record Attachment(String name) { + java.util.Map asMap() { + return java.util.Map.of("name", name); + } + } +} +``` + +Keep `.stream()` on the source line. One-operation chains may stay on one line when readable. diff --git a/evals-reference/37-to-map-function-identity-mapper/capability.txt b/evals-reference/37-to-map-function-identity-mapper/capability.txt new file mode 100644 index 0000000..a701193 --- /dev/null +++ b/evals-reference/37-to-map-function-identity-mapper/capability.txt @@ -0,0 +1,2 @@ +Use Function.identity for true identity mappers in toMap while preserving merge functions, map +suppliers, and non-identity mappings. diff --git a/evals-reference/37-to-map-function-identity-mapper/criteria.json b/evals-reference/37-to-map-function-identity-mapper/criteria.json new file mode 100644 index 0000000..0316c8e --- /dev/null +++ b/evals-reference/37-to-map-function-identity-mapper/criteria.json @@ -0,0 +1,56 @@ +{ + "context": "Reference focused cleanup: identity key and value mappers in Collectors.toMap should use Function.identity without changing collector semantics.", + "type": "weighted_checklist", + "checklist": [ + { + "name": "Creates coherent Java 17 artifact", + "category": "safety", + "max_score": 8, + "description": "Returns complete Java 17-compatible StateIndexes code and adds any required java.util.function.Function import." + }, + { + "name": "Uses Function.identity for identity key mapper", + "category": "stream_quality", + "max_score": 20, + "description": "Replaces state -> state with Function.identity() in the normalized state counting collector." + }, + { + "name": "Uses Function.identity for identity value mapper", + "category": "stream_quality", + "max_score": 20, + "description": "Replaces card -> card with Function.identity() in the Card::id indexing collector." + }, + { + "name": "Preserves merge functions", + "category": "safety", + "max_score": 16, + "description": "Keeps Integer::sum for duplicate state counts and keeps the left-wins merge function for duplicate card ids." + }, + { + "name": "Preserves map suppliers", + "category": "safety", + "max_score": 14, + "description": "Keeps HashMap::new and LinkedHashMap::new where they were part of the original collector behavior." + }, + { + "name": "Does not over-apply identity", + "category": "stream_quality", + "max_score": 14, + "description": "Does not replace the non-identity card -> card.displayName() mapper with Function.identity() or a custom identity helper." + }, + { + "name": "Keeps downstream result behavior", + "category": "safety", + "max_score": 8, + "description": "Preserves values().stream().toList() behavior, method signatures, record fields, and imports without unrelated collector changes." + } + ], + "metadata": { + "invocation": "natural", + "task_type": "cleanup", + "evidence_type": "focused_reference", + "issue": "https://github.com/martinfrancois/java-streams-skill/issues/44", + "reference_selection": "Focused issue #44 coverage for Function.identity in toMap identity mappers.", + "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; do not report as ordinary broad lift if runtime references later teach this same collector shape." + } +} diff --git a/evals-reference/37-to-map-function-identity-mapper/task.md b/evals-reference/37-to-map-function-identity-mapper/task.md new file mode 100644 index 0000000..c351c16 --- /dev/null +++ b/evals-reference/37-to-map-function-identity-mapper/task.md @@ -0,0 +1,38 @@ +# Clean up toMap identity mappers + +Refactor `StateIndexes.java`. Assume Java 17. + +Return the revised Java code only. + +```java +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +final class StateIndexes { + Map runningCountsByState(List normalizedStates) { + return normalizedStates.stream() + .collect(Collectors.toMap(state -> state, state -> 1, Integer::sum, HashMap::new)); + } + + List dedupeById(List normalized) { + return normalized.stream() + .collect(Collectors.toMap(Card::id, card -> card, (left, right) -> left, LinkedHashMap::new)) + .values() + .stream() + .toList(); + } + + Map displayNameById(List cards) { + return cards.stream() + .collect(Collectors.toMap(Card::id, card -> card.displayName(), (left, right) -> left)); + } + + record Card(String id, String displayName) {} +} +``` + +Preserve duplicate-key merge behavior, explicit map suppliers, and the non-identity display-name +mapper. diff --git a/evals-reference/38-collector-owned-result-foreach-mutation/capability.txt b/evals-reference/38-collector-owned-result-foreach-mutation/capability.txt new file mode 100644 index 0000000..891492c --- /dev/null +++ b/evals-reference/38-collector-owned-result-foreach-mutation/capability.txt @@ -0,0 +1,2 @@ +Replace external stream mutation with result-producing stream terminals and collectors when the +method is building a returned value. diff --git a/evals-reference/38-collector-owned-result-foreach-mutation/criteria.json b/evals-reference/38-collector-owned-result-foreach-mutation/criteria.json new file mode 100644 index 0000000..7ed38d6 --- /dev/null +++ b/evals-reference/38-collector-owned-result-foreach-mutation/criteria.json @@ -0,0 +1,56 @@ +{ + "context": "Reference focused cleanup: stream pipelines that build returned values should use result-producing terminals instead of external mutation.", + "type": "weighted_checklist", + "checklist": [ + { + "name": "Creates coherent Java 17 artifact", + "category": "safety", + "max_score": 8, + "description": "Returns complete Java 17-compatible WritableRoots code with necessary imports and methods intact." + }, + { + "name": "Uses Stream.concat for ordered sources", + "category": "stream_quality", + "max_score": 20, + "description": "Builds configured and environment root streams and combines them before distinct().toList(), rather than mutating an external ArrayList from forEach." + }, + { + "name": "Preserves ordered duplicate handling", + "category": "safety", + "max_score": 16, + "description": "Keeps configured roots before environment roots and preserves first-occurrence distinct behavior." + }, + { + "name": "Uses collector-owned map accumulation", + "category": "stream_quality", + "max_score": 22, + "description": "Replaces counts.merge inside stream().forEach with Collectors.toMap or an equivalent collector-owned counting result." + }, + { + "name": "Preserves duplicate count merge", + "category": "safety", + "max_score": 14, + "description": "Keeps duplicate normalized states counted with Integer::sum or equivalent addition, and keeps a mutable HashMap-style result when that behavior is preserved." + }, + { + "name": "Rejects cosmetic mutation", + "category": "stream_quality", + "max_score": 12, + "description": "Does not replace forEach mutation with intermediate lists or maps that are immediately copied into another mutable result without improving the boundary." + }, + { + "name": "Keeps helpers focused", + "category": "maintainability", + "max_score": 8, + "description": "Avoids parallel streams, global caches, or broad helper abstractions unrelated to result-producing accumulation." + } + ], + "metadata": { + "invocation": "natural", + "task_type": "cleanup", + "evidence_type": "focused_reference", + "issue": "https://github.com/martinfrancois/java-streams-skill/issues/42", + "reference_selection": "Focused issue #42 coverage for collector-owned results instead of stream forEach mutation.", + "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; do not report as ordinary broad lift if runtime references later teach this same side-effect boundary." + } +} diff --git a/evals-reference/38-collector-owned-result-foreach-mutation/task.md b/evals-reference/38-collector-owned-result-foreach-mutation/task.md new file mode 100644 index 0000000..3b650b8 --- /dev/null +++ b/evals-reference/38-collector-owned-result-foreach-mutation/task.md @@ -0,0 +1,51 @@ +# Replace misleading stream mutation + +Refactor `WritableRoots.java` where stream pipelines should own returned results. Assume Java 17. + +Return the revised Java code only. + +```java +import java.io.File; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Stream; + +final class WritableRoots { + List additionalWritableRoots(Path workflowDirectory, Map codex) { + List roots = new ArrayList<>(); + list(codex, "additional_writable_roots", List.of()).stream() + .map(value -> workflowDirectory.resolve(value)) + .forEach(roots::add); + environmentValue("ADDITIONAL_WRITABLE_ROOTS").stream() + .flatMap(value -> Arrays.stream(value.split(java.util.regex.Pattern.quote(File.pathSeparator)))) + .map(String::trim) + .filter(value -> !value.isBlank()) + .map(workflowDirectory::resolve) + .forEach(roots::add); + return roots.stream().distinct().toList(); + } + + Map runningCountsByState(List normalizedStates) { + Map counts = new HashMap<>(); + normalizedStates.stream().forEach(state -> counts.merge(state, 1, Integer::sum)); + return counts; + } + + private static List list(Map map, String key, List defaultValue) { + Object value = map.get(key); + return value instanceof List values ? values.stream().map(Object::toString).toList() : defaultValue; + } + + private static Optional environmentValue(String name) { + return Optional.empty(); + } +} +``` + +Preserve configured-root precedence over environment roots, first-occurrence duplicate removal, and +mutable `HashMap` result behavior. diff --git a/evals-reference/39-batch-reference-lookup-before-rendering/capability.txt b/evals-reference/39-batch-reference-lookup-before-rendering/capability.txt new file mode 100644 index 0000000..0e55d4f --- /dev/null +++ b/evals-reference/39-batch-reference-lookup-before-rendering/capability.txt @@ -0,0 +1,2 @@ +Separate pure stream extraction, one batched lookup boundary, rendering, and explicit write phases +when refactoring stream-heavy Java code. diff --git a/evals-reference/39-batch-reference-lookup-before-rendering/criteria.json b/evals-reference/39-batch-reference-lookup-before-rendering/criteria.json new file mode 100644 index 0000000..ae249b2 --- /dev/null +++ b/evals-reference/39-batch-reference-lookup-before-rendering/criteria.json @@ -0,0 +1,56 @@ +{ + "context": "Reference focused cleanup: gather lookup IDs across a batch before rendering so helpers do not hide repeated network I/O.", + "type": "weighted_checklist", + "checklist": [ + { + "name": "Creates coherent Java 17 snippets", + "category": "safety", + "max_score": 8, + "description": "Returns coherent Java 17-compatible revised snippets that preserve the represented class, records, helper methods, and public method shape." + }, + { + "name": "Analyzes cards before lookup", + "category": "stream_quality", + "max_score": 18, + "description": "Creates an analysis or equivalent phase so each card's plan and prompt reference texts are known before card-state lookup happens." + }, + { + "name": "Batches lookup IDs once", + "category": "stream_quality", + "max_score": 24, + "description": "Collects all prerequisite and prompt-reference lookup IDs across the full card batch, applies distinct at the batch boundary, and calls fetchCardStatesByIds once." + }, + { + "name": "Keeps rendering pure", + "category": "maintainability", + "max_score": 14, + "description": "Makes prompt rendering use already available lookup results and avoids helper methods named like renderers that perform hidden network I/O." + }, + { + "name": "Keeps writes explicit", + "category": "stream_quality", + "max_score": 12, + "description": "Keeps checklist synchronization or other externally visible writes outside stream pipelines and visible in an explicit per-card step." + }, + { + "name": "Preserves reference behavior", + "category": "safety", + "max_score": 16, + "description": "Does not drop prompt references, plan references, card order, rendered-reference creation, or checklist sync behavior while separating phases." + }, + { + "name": "Avoids stream-heavy overreach", + "category": "maintainability", + "max_score": 8, + "description": "Does not hide Trello or network side effects in map, peek, collectors, or broad custom frameworks." + } + ], + "metadata": { + "invocation": "natural", + "task_type": "cleanup", + "evidence_type": "focused_reference", + "issue": "https://github.com/martinfrancois/java-streams-skill/issues/41", + "reference_selection": "Focused issue #41 coverage for batch lookup before rendering.", + "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; do not report as ordinary broad lift if runtime references later teach this same phase-separation shape." + } +} diff --git a/evals-reference/39-batch-reference-lookup-before-rendering/task.md b/evals-reference/39-batch-reference-lookup-before-rendering/task.md new file mode 100644 index 0000000..1e22cb5 --- /dev/null +++ b/evals-reference/39-batch-reference-lookup-before-rendering/task.md @@ -0,0 +1,63 @@ +# Separate batch lookup from rendering + +Refactor `ReferenceRenderer.java` to avoid hidden repeated lookups while keeping writes explicit. +Assume Java 17. + +Return the revised Java snippets only. + +```java +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Stream; + +final class ReferenceRenderer { + List enrich(List cards, boolean includeReferenceContext) { + Map plans = new LinkedHashMap<>(); + for (Card card : cards) { + plans.put(card.id(), plan(card)); + } + List enriched = new ArrayList<>(); + for (Card card : cards) { + Plan plan = plans.get(card.id()); + List references = + includeReferenceContext ? promptReferences(card, plan, Map.of()) : List.of(); + syncChecklist(card, plan); + enriched.add(card.withReferences(references)); + } + return enriched; + } + + private List promptReferences(Card card, Plan plan, Map known) { + Map references = referenceTexts(card, plan); + List missing = references.values().stream() + .map(ReferenceText::lookupId) + .filter(id -> !known.containsKey(id)) + .distinct() + .toList(); + Map lookupResults = new LinkedHashMap<>(known); + lookupResults.putAll(fetchCardStatesByIds(missing)); + return references.values().stream() + .map(reference -> render(reference, lookupResults.get(reference.lookupId()))) + .toList(); + } + + private Plan plan(Card card) { return new Plan(card.references()); } + private Map referenceTexts(Card card, Plan plan) { return Map.of(); } + private Map fetchCardStatesByIds(List ids) { return Map.of(); } + private RenderedReference render(ReferenceText reference, LookupResult result) { return new RenderedReference(); } + private void syncChecklist(Card card, Plan plan) {} + + record Card(String id, List references) { + Card withReferences(List references) { return this; } + } + record Plan(List references) {} + record ReferenceText(String key, String lookupId) {} + record LookupResult(String state) {} + record RenderedReference() {} +} +``` + +The lookup method is a network boundary. Keep checklist synchronization explicit and outside stream +pipelines. diff --git a/evals-reference/40-mapmulti-declaration-extraction/capability.txt b/evals-reference/40-mapmulti-declaration-extraction/capability.txt new file mode 100644 index 0000000..44428a7 --- /dev/null +++ b/evals-reference/40-mapmulti-declaration-extraction/capability.txt @@ -0,0 +1,2 @@ +Refactor Java 25 declaration extraction so streams produce the result directly and use mapMulti for +zero-or-one line emissions. diff --git a/evals-reference/40-mapmulti-declaration-extraction/criteria.json b/evals-reference/40-mapmulti-declaration-extraction/criteria.json new file mode 100644 index 0000000..ee0c9a9 --- /dev/null +++ b/evals-reference/40-mapmulti-declaration-extraction/criteria.json @@ -0,0 +1,56 @@ +{ + "context": "Reference focused cleanup: Java 25 line-to-zero-or-one stream transformations should prefer mapMulti over side-effecting forEach or line-level flatMap when appropriate.", + "type": "weighted_checklist", + "checklist": [ + { + "name": "Creates coherent Java 25 artifact", + "category": "safety", + "max_score": 8, + "description": "Returns complete Java 25-compatible DeclarationExtractor code with necessary imports, records, methods, and Pattern usage intact." + }, + { + "name": "Produces the result through the pipeline", + "category": "stream_quality", + "max_score": 18, + "description": "Refactors declarations(Card) so the stream pipeline returns the final declarations via toList rather than mutating an external List from forEach." + }, + { + "name": "Uses mapMulti for zero-or-one emission", + "category": "stream_quality", + "max_score": 24, + "description": "Uses mapMulti or an equivalent Consumer-based emitter for the line-to-zero-or-one Declaration transformation, avoiding tiny Stream.of/Stream.empty allocations as the preferred final shape." + }, + { + "name": "Keeps text-block flatMap acceptable", + "category": "stream_quality", + "max_score": 10, + "description": "Uses flatMap only where each text block naturally expands to a stream of declarations, and does not confuse that with the line-level zero-or-one transformation." + }, + { + "name": "Preserves filters and matching", + "category": "safety", + "max_score": 18, + "description": "Filters null and blank text blocks, keeps LABELED_SOURCE.matcher(line).matches(), preserves the captured value, and searches title, description, and comments." + }, + { + "name": "Avoids side-effecting stream terminals", + "category": "stream_quality", + "max_score": 12, + "description": "Does not keep text.lines().forEach(...) or helper methods whose purpose is mutating a caller-owned declarations list." + }, + { + "name": "Keeps implementation readable", + "category": "maintainability", + "max_score": 10, + "description": "Extracts a small emitter helper when useful and avoids custom collectors, large abstractions, or unrelated parser changes." + } + ], + "metadata": { + "invocation": "natural", + "task_type": "cleanup", + "evidence_type": "focused_reference", + "issue": "https://github.com/martinfrancois/java-streams-skill/issues/40", + "reference_selection": "Focused issue #40 coverage for mapMulti declaration extraction instead of side-effecting forEach.", + "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; do not report as ordinary broad lift if runtime references later teach this same mapMulti shape." + } +} diff --git a/evals-reference/40-mapmulti-declaration-extraction/task.md b/evals-reference/40-mapmulti-declaration-extraction/task.md new file mode 100644 index 0000000..3e57214 --- /dev/null +++ b/evals-reference/40-mapmulti-declaration-extraction/task.md @@ -0,0 +1,51 @@ +# Improve declaration extraction + +Refactor `DeclarationExtractor.java` for a Java 25 codebase. Assume Java 25. + +Return the revised Java code only. + +```java +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +final class DeclarationExtractor { + private static final Pattern LABELED_SOURCE = Pattern.compile("(?i)^source[\\t ]*:[\\t ]*(.*)$"); + + static List declarations(Card card) { + List declarations = new ArrayList<>(); + addDeclarations(declarations, card.title()); + addDeclarations(declarations, card.description()); + card.comments().stream() + .map(Comment::text) + .forEach(text -> addDeclarations(declarations, text)); + return List.copyOf(declarations); + } + + private static void addDeclarations(List declarations, String text) { + if (text == null || text.isBlank()) { + return; + } + text.lines().forEach(line -> addDeclaration(declarations, line)); + } + + private static void addDeclaration(List declarations, String line) { + Matcher labeled = LABELED_SOURCE.matcher(line); + if (labeled.matches()) { + declarations.add(new Declaration(labeled.group(1))); + } + } + + record Card(String title, String description, List comments) {} + record Comment(String text) {} + record Declaration(String value) {} +} +``` + +Each nonblank text block can contain zero or more declaration lines. Preserve title, description, +comments, matching with `matches()`, and the unmodifiable result. + +For the line-to-zero-or-one declaration transformation, prefer a Java 25 stream shape using +`mapMulti` or a small `Consumer` emitter helper rather than a side-effecting `forEach`, `filter` +plus `map`, or tiny `Stream.of`/`Stream.empty` helpers. diff --git a/evals-reference/41-tail-stream-equivalence-check/capability.txt b/evals-reference/41-tail-stream-equivalence-check/capability.txt new file mode 100644 index 0000000..223532b --- /dev/null +++ b/evals-reference/41-tail-stream-equivalence-check/capability.txt @@ -0,0 +1,2 @@ +Move expensive tail work into lazy stream pipelines so allMatch can short-circuit meaningful work +while preserving conflict behavior. diff --git a/evals-reference/41-tail-stream-equivalence-check/criteria.json b/evals-reference/41-tail-stream-equivalence-check/criteria.json new file mode 100644 index 0000000..024b7aa --- /dev/null +++ b/evals-reference/41-tail-stream-equivalence-check/criteria.json @@ -0,0 +1,56 @@ +{ + "context": "Reference focused cleanup: allMatch only short-circuits expensive work when that work stays inside the lazy stream pipeline.", + "type": "weighted_checklist", + "checklist": [ + { + "name": "Creates coherent Java 17 artifact", + "category": "safety", + "max_score": 8, + "description": "Returns complete Java 17-compatible DeclarationConflict code with methods, enum, and records intact." + }, + { + "name": "Keeps explicit zero one many branching", + "category": "maintainability", + "max_score": 14, + "description": "Keeps the zero, one, and multiple declaration cases readable, for example with a switch or clear branch structure." + }, + { + "name": "Parses first declaration once", + "category": "stream_quality", + "max_score": 16, + "description": "Parses the first declaration once and uses it as the comparison target for later declarations." + }, + { + "name": "Moves tail parsing into lazy stream", + "category": "stream_quality", + "max_score": 24, + "description": "Streams declarations after the first with skip(1), maps each remaining declaration through parse inside the pipeline, and allMatch short-circuits on the first invalid or conflicting result." + }, + { + "name": "Avoids eager parsed list", + "category": "stream_quality", + "max_score": 14, + "description": "Does not eagerly parse every declaration into a temporary list before calling allMatch." + }, + { + "name": "Preserves fail-closed behavior", + "category": "safety", + "max_score": 16, + "description": "Accepts multiple declarations only when the first is selected and every remaining parsed selection is equivalent; blank, invalid, or conflicting declarations still return the conflict result." + }, + { + "name": "Avoids noisy manual loop", + "category": "maintainability", + "max_score": 8, + "description": "Does not replace the tail-stream check with a more verbose manual loop unless the loop is clearly simpler and still keeps lazy parsing." + } + ], + "metadata": { + "invocation": "natural", + "task_type": "cleanup", + "evidence_type": "focused_reference", + "issue": "https://github.com/martinfrancois/java-streams-skill/issues/39", + "reference_selection": "Focused issue #39 coverage for tail-stream equivalence checks and allMatch laziness.", + "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; do not report as ordinary broad lift if runtime references later teach this same allMatch shape." + } +} diff --git a/evals-reference/41-tail-stream-equivalence-check/task.md b/evals-reference/41-tail-stream-equivalence-check/task.md new file mode 100644 index 0000000..0d87340 --- /dev/null +++ b/evals-reference/41-tail-stream-equivalence-check/task.md @@ -0,0 +1,52 @@ +# Avoid eager work before allMatch + +Refactor `DeclarationConflict.java`. Assume Java 17. + +Return the revised Java code only. + +```java +import java.util.ArrayList; +import java.util.List; + +final class DeclarationConflict { + RepositorySourceSelection explicitSource(List declarations) { + if (declarations.isEmpty()) { + return RepositorySourceSelection.none(); + } + if (declarations.size() == 1) { + Declaration declaration = declarations.get(0); + return parse(declaration.value(), declaration.mode()); + } + List parsed = new ArrayList<>(declarations.size()); + for (Declaration declaration : declarations) { + parsed.add(parse(declaration.value(), declaration.mode())); + } + RepositorySourceSelection first = parsed.get(0); + if (first.selected() + && parsed.stream().allMatch(selection -> equivalent(first, selection))) { + return first; + } + return RepositorySourceSelection.invalid("repository_source_conflict"); + } + + private RepositorySourceSelection parse(String value, SourceMode mode) { + return new RepositorySourceSelection(value, mode, value != null && !value.isBlank()); + } + + private static boolean equivalent(RepositorySourceSelection expected, RepositorySourceSelection actual) { + return actual.selected() + && expected.value().equals(actual.value()) + && expected.mode() == actual.mode(); + } + + enum SourceMode { REMOTE, LOCAL } + record Declaration(String value, SourceMode mode) {} + record RepositorySourceSelection(String value, SourceMode mode, boolean selected) { + static RepositorySourceSelection none() { return new RepositorySourceSelection("", SourceMode.REMOTE, false); } + static RepositorySourceSelection invalid(String code) { return new RepositorySourceSelection(code, SourceMode.REMOTE, false); } + } +} +``` + +Multiple declarations are valid only when the first parses to a selected source and every remaining +declaration parses to an equivalent selected source. Preserve fail-closed conflict behavior. diff --git a/evals-reference/42-foreach-side-effect-classification/capability.txt b/evals-reference/42-foreach-side-effect-classification/capability.txt new file mode 100644 index 0000000..d91171c --- /dev/null +++ b/evals-reference/42-foreach-side-effect-classification/capability.txt @@ -0,0 +1,2 @@ +Review stream forEach usage by classifying external mutation, true side-effect terminals, builder +APIs, logging, and unsafe parallel shared mutation. diff --git a/evals-reference/42-foreach-side-effect-classification/criteria.json b/evals-reference/42-foreach-side-effect-classification/criteria.json new file mode 100644 index 0000000..09501dc --- /dev/null +++ b/evals-reference/42-foreach-side-effect-classification/criteria.json @@ -0,0 +1,56 @@ +{ + "context": "Reference focused review: stream forEach is a trigger for classification, not a blanket ban.", + "type": "weighted_checklist", + "checklist": [ + { + "name": "Creates concrete review artifact", + "category": "safety", + "max_score": 8, + "description": "Creates review.md or equivalent concrete review output that addresses each marked method." + }, + { + "name": "Recommends result-producing displayNames", + "category": "stream_quality", + "max_score": 22, + "description": "Recommends replacing stream().forEach(names::add) with a direct map/filter/toList result, preserving active filtering and immutable copy behavior." + }, + { + "name": "Accepts builder side effect with caveats", + "category": "stream_quality", + "max_score": 16, + "description": "Recognizes applyHeaders as a defensible side-effect boundary because mutating the builder is the operation, while still checking ordering, exception behavior, and whether a loop would be clearer." + }, + { + "name": "Accepts logging as side-effect purpose", + "category": "stream_quality", + "max_score": 12, + "description": "Does not force logging into a fake collector; explains that terminal side effects such as logging can be acceptable under the debug guard." + }, + { + "name": "Rejects unsafe parallel mutation", + "category": "safety", + "max_score": 20, + "description": "Flags the parallelStream().forEach mutation of an int array as unsafe shared mutation and recommends count(), a primitive stream count, or another collector-owned result." + }, + { + "name": "Distinguishes loop from collector fixes", + "category": "maintainability", + "max_score": 12, + "description": "Explains, either as a general principle or through concrete method-by-method rationale, that external API side effects may be clearer as a plain loop while collection-building should normally use a value-producing terminal." + }, + { + "name": "Avoids blanket rule", + "category": "maintainability", + "max_score": 10, + "description": "Does not claim every forEach is wrong or every forEach should become a collector, and does not recommend parallelism without measurement." + } + ], + "metadata": { + "invocation": "natural", + "task_type": "review", + "evidence_type": "focused_reference", + "issue": "https://github.com/martinfrancois/java-streams-skill/issues/5", + "reference_selection": "Focused issue #5 coverage for classifying stream forEach side effects.", + "runtime_reference_overlap_rationale": "Allowed only as reference-suite focused coverage; the runtime skill already teaches the general forEach classification policy." + } +} diff --git a/evals-reference/42-foreach-side-effect-classification/task.md b/evals-reference/42-foreach-side-effect-classification/task.md new file mode 100644 index 0000000..cd5d4f7 --- /dev/null +++ b/evals-reference/42-foreach-side-effect-classification/task.md @@ -0,0 +1,54 @@ +# Review stream forEach side effects + +Review `ForEachReview.java`. Assume Java 17. + +Create `review.md` with concrete recommendations for each marked method. Do not rewrite the whole +class; classify which `forEach` uses should become result-producing stream operations, which should +be plain loops, and which can remain terminal side effects with caveats. Include ordering and +exception-propagation caveats where they matter. + +```java +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.logging.Logger; + +final class ForEachReview { + private static final Logger LOG = Logger.getLogger(ForEachReview.class.getName()); + + List displayNames(List users) { + List names = new ArrayList<>(); + users.stream() + .filter(User::active) + .map(User::displayName) + .forEach(names::add); + return List.copyOf(names); + } + + void applyHeaders(RequestBuilder builder, Map headers) { + headers.entrySet().stream() + .filter(entry -> !entry.getValue().isBlank()) + .forEach(entry -> builder.header(entry.getKey(), entry.getValue())); + } + + void logDebugProperties(List properties) { + if (LOG.isLoggable(java.util.logging.Level.FINE)) { + properties.stream().forEach(LOG::fine); + } + } + + int countErrors(List results) { + int[] count = {0}; + results.parallelStream() + .filter(Result::failed) + .forEach(result -> count[0]++); + return count[0]; + } + + record User(String displayName, boolean active) {} + record Result(boolean failed) {} + interface RequestBuilder { + void header(String name, String value); + } +} +``` diff --git a/evals-reference/NUMBERING.md b/evals-reference/NUMBERING.md index c1fafa5..d75633b 100644 --- a/evals-reference/NUMBERING.md +++ b/evals-reference/NUMBERING.md @@ -29,6 +29,12 @@ the stream should use short glue lambdas or method references while extracted he multi-step derivation. Keep it in `evals-reference/` until targeted hosted evidence shows whether it belongs in main or regression. +Numbers `29` through `42` were added during the July 2026 open-issue sweep. They cover focused +reference scenarios for bounded duplicate lookup, findAny audits, immutable/result collection +boundaries, predicate loops, parser-preserving streams, collector rationale, formatting, identity +mappers, batched lookup phases, mapMulti extraction, tail allMatch checks, and forEach side-effect +classification. Keep them in `evals-reference/` until isolated hosted runs classify each scenario. + Number `25` contains the explicit hard-stop scan workflow audit that was demoted from the main eval set and later moved to `evals-regression/`. It requires exact skill-provided text, so report it as with-context regression coverage rather than as main or reference Java stream reasoning lift.