diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index dac15ce..bb60e02 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -55,7 +55,7 @@ Tessl-authenticated checks: - [ ] `bash scripts/check_publish_dry_run.sh .` - [ ] `tessl plugin publish --dry-run --bump patch .` -- [ ] `tessl skill review --threshold 100 skills/java-optionals/SKILL.md`, if skill text or references changed +- [ ] `tessl review run --workspace martinfrancois --threshold 100 skills/java-optionals/SKILL.md`, if skill text or references changed - [ ] Targeted main/reference `scripts/run_eval_suite.sh `, if skill behavior or those evals changed - [ ] Targeted regression `scripts/run_eval_suite.sh regression `, if regression evals changed - [ ] Every substantively changed eval scenario was rerun targeted and reached 100% with context, or the PR explains the Tessl blocker and remaining work @@ -65,7 +65,7 @@ Tessl-authenticated checks: - [ ] `scripts/classify_eval_result.py --scenario-dir `, if a scenario was added or moved between suites - [ ] Full/main `scripts/run_eval_suite.sh main`, if benchmark claims changed or targeted with-context results are clean -`bash scripts/check_publish_dry_run.sh .`, `tessl skill review`, and hosted Tessl evals require +`bash scripts/check_publish_dry_run.sh .`, `tessl review run`, and hosted Tessl evals require Tessl authentication. Hosted evals also require a linked Tessl project. If you can't run one of them, leave it unchecked and explain why in the details. diff --git a/.github/workflows/skill-review.yml b/.github/workflows/skill-review.yml index c2dcbbb..6ff5636 100644 --- a/.github/workflows/skill-review.yml +++ b/.github/workflows/skill-review.yml @@ -44,4 +44,4 @@ jobs: - name: Review skill if: ${{ env.TESSL_TOKEN_AVAILABLE == 'true' }} - run: tessl skill review --threshold 100 skills/java-optionals/SKILL.md + run: tessl review run --workspace martinfrancois --threshold 100 skills/java-optionals/SKILL.md diff --git a/docs/agents/evals.md b/docs/agents/evals.md index afd75d0..9e030e4 100644 --- a/docs/agents/evals.md +++ b/docs/agents/evals.md @@ -148,7 +148,11 @@ benchmark claims, or scoring rules. Current active suite structure: - `evals/`: 4 scenarios, 360 checklist points, 3 natural and 1 explicit. -- `evals-reference/`: 46 scenarios, 2470 checklist points, broad candidate and diagnostic coverage. +- `evals-reference/`: 52 scenarios, 3070 checklist points, broad candidate and diagnostic coverage. + Reference numbers `51` through `56` cover the July 2026 open-issue sweep for presence-to-enum + selection, findAny/findFirst Optional terminals, domain selections with lazy fallback, + side-effecting upsert boundaries, ifPresentOrElse rendering branches, and lifecycle Optional + helper boundaries. - `evals-regression/`: 2 scenarios, 200 checklist points, with-context safety coverage. ## Checks diff --git a/docs/agents/workflow.md b/docs/agents/workflow.md index 62ba7a3..8856b30 100644 --- a/docs/agents/workflow.md +++ b/docs/agents/workflow.md @@ -105,7 +105,7 @@ release-readiness. - Run the Tessl skill review at threshold 100 when changing runtime skill content: ```bash - tessl skill review --threshold 100 skills/java-optionals/SKILL.md + tessl review run --workspace martinfrancois --threshold 100 skills/java-optionals/SKILL.md ``` - Pull request titles and commits must use Conventional Commits. Release Please uses them to update diff --git a/evals-reference/51-presence-selects-enum-value/capability.txt b/evals-reference/51-presence-selects-enum-value/capability.txt new file mode 100644 index 0000000..612a00a --- /dev/null +++ b/evals-reference/51-presence-selects-enum-value/capability.txt @@ -0,0 +1,2 @@ +Refactor Optional presence checks that only select enum values while preserving side-effecting +branches as explicit code. diff --git a/evals-reference/51-presence-selects-enum-value/criteria.json b/evals-reference/51-presence-selects-enum-value/criteria.json new file mode 100644 index 0000000..debe39d --- /dev/null +++ b/evals-reference/51-presence-selects-enum-value/criteria.json @@ -0,0 +1,54 @@ +{ + "context": "Reference cleanup: Optional presence selecting between enum values should be expressed as Optional value flow when no side effects or checked work are involved.", + "type": "weighted_checklist", + "checklist": [ + { + "name": "Creates coherent Java 17 artifact", + "category": "safety", + "max_score": 8, + "description": "Returns complete Java 17-compatible BoardSetupChoices code with imports, enum, interfaces, and methods intact." + }, + { + "name": "Uses Optional map for enum selection", + "category": "optional_quality", + "max_score": 30, + "description": "Replaces the dry-run isPresent ternary with options.existingBoardId().map(...).orElse(...) or equivalent Optional value flow." + }, + { + "name": "Preserves ignored present value", + "category": "optional_quality", + "max_score": 12, + "description": "Correctly ignores the contained board id when mapping any present value to BoardSetupChoice.EXISTING." + }, + { + "name": "Uses appropriate eager constant fallback", + "category": "optional_quality", + "max_score": 12, + "description": "Uses orElse for the trivial BoardSetupChoice.NEW constant fallback rather than noisy orElseGet when no work is deferred." + }, + { + "name": "Keeps side-effect branch imperative", + "category": "safety", + "max_score": 16, + "description": "Does not force choiceWithAudit into an Optional chain that hides auditLog.record or changes side-effect ordering." + }, + { + "name": "Avoids Optional antipatterns", + "category": "optional_quality", + "max_score": 14, + "description": "Does not introduce get(), orElseThrow(), orElse(null), fake lists, or a custom helper just to select the enum." + }, + { + "name": "Keeps behavior focused", + "category": "maintainability", + "max_score": 8, + "description": "Does not change enum values, method signatures, rejectNewBoardInProgress calls, or unrelated setup behavior." + } + ], + "metadata": { + "invocation": "natural", + "task_type": "cleanup", + "evidence_type": "ordinary_lift", + "issue": "https://github.com/martinfrancois/java-optionals-skill/issues/74" + } +} diff --git a/evals-reference/51-presence-selects-enum-value/task.md b/evals-reference/51-presence-selects-enum-value/task.md new file mode 100644 index 0000000..d79bd04 --- /dev/null +++ b/evals-reference/51-presence-selects-enum-value/task.md @@ -0,0 +1,45 @@ +# Clean up enum selection + +Create `BoardSetupChoices.java` with the refactored class. Improve value flow where appropriate. +Assume Java 17. + +Return the complete revised Java code only. + +```java +import java.util.Optional; + +final class BoardSetupChoices { + void rejectDryRunNewBoardInProgress(LocalSetupOptions options) { + BoardSetupChoice dryRunChoice = + options.existingBoardId().isPresent() ? BoardSetupChoice.EXISTING : BoardSetupChoice.NEW; + rejectNewBoardInProgress(options, dryRunChoice); + } + + BoardSetupChoice choiceWithAudit(LocalSetupOptions options, AuditLog auditLog) { + if (options.existingBoardId().isPresent()) { + auditLog.record("existing board selected"); + return BoardSetupChoice.EXISTING; + } + return BoardSetupChoice.NEW; + } + + private void rejectNewBoardInProgress(LocalSetupOptions options, BoardSetupChoice choice) {} + + enum BoardSetupChoice { + EXISTING, + NEW + } + + interface LocalSetupOptions { + Optional existingBoardId(); + } + + interface AuditLog { + void record(String message); + } +} +``` + +The present board id value is intentionally ignored in the dry-run choice. Preserve side effects and +enum values. Keep `choiceWithAudit` as an explicit imperative branch; do not hide +`auditLog.record(...)` inside an Optional `map` or other transformation callback. diff --git a/evals-reference/52-findany-equivalent-optional-matches/capability.txt b/evals-reference/52-findany-equivalent-optional-matches/capability.txt new file mode 100644 index 0000000..784a650 --- /dev/null +++ b/evals-reference/52-findany-equivalent-optional-matches/capability.txt @@ -0,0 +1 @@ +Audit Optional-producing stream lookups and choose findAny only when all matches are equivalent. diff --git a/evals-reference/52-findany-equivalent-optional-matches/criteria.json b/evals-reference/52-findany-equivalent-optional-matches/criteria.json new file mode 100644 index 0000000..a129cd4 --- /dev/null +++ b/evals-reference/52-findany-equivalent-optional-matches/criteria.json @@ -0,0 +1,48 @@ +{ + "context": "Reference cleanup: Optional-returning stream terminals should use findAny for equivalent matches and preserve findFirst for ordered first-match contracts.", + "type": "weighted_checklist", + "checklist": [ + { + "name": "Creates coherent Java 17 artifact", + "category": "safety", + "max_score": 8, + "description": "Returns complete Java 17-compatible OptionalLookupTerminals code with imports, helper, methods, and record intact." + }, + { + "name": "Uses findAny for equivalent matches", + "category": "optional_quality", + "max_score": 30, + "description": "Changes exact and normalized configured-name lookups to findAny because the Optional result does not have an encounter-order contract." + }, + { + "name": "Preserves ordered findFirst contracts", + "category": "optional_quality", + "max_score": 24, + "description": "Keeps findFirst for PATH-style executable lookup and first Java version line, where encounter order selects the value." + }, + { + "name": "Explains retained findFirst calls", + "category": "maintainability", + "max_score": 12, + "description": "Adds concise comments or equivalent explanation for retained findFirst calls based on semantic order, not current sequential-stream behavior." + }, + { + "name": "Avoids mechanical terminal changes", + "category": "safety", + "max_score": 14, + "description": "Does not replace every findFirst blindly and does not preserve every findFirst merely because the code already works." + }, + { + "name": "Preserves Optional flattening", + "category": "safety", + "max_score": 12, + "description": "Keeps Optional-returning parsing flattened correctly with Optional::stream or equivalent and does not introduce get(), null, or fake-list handling." + } + ], + "metadata": { + "invocation": "natural", + "task_type": "cleanup", + "evidence_type": "ordinary_lift", + "issue": "https://github.com/martinfrancois/java-optionals-skill/issues/72" + } +} diff --git a/evals-reference/52-findany-equivalent-optional-matches/task.md b/evals-reference/52-findany-equivalent-optional-matches/task.md new file mode 100644 index 0000000..32f9e58 --- /dev/null +++ b/evals-reference/52-findany-equivalent-optional-matches/task.md @@ -0,0 +1,56 @@ +# Audit Optional stream lookup terminals + +Refactor `OptionalLookupTerminals.java` only where the terminal operation better communicates the +contract. Assume Java 17. + +Return the revised Java code and one brief comment beside each retained `findFirst()` explaining why +the first match is semantically required. + +```java +import java.nio.file.Path; +import java.util.List; +import java.util.Locale; +import java.util.Optional; + +final class OptionalLookupTerminals { + static Optional detectedList(List openListNames, String expectedName) { + return openListNames.stream() + .filter(name -> name.equalsIgnoreCase(expectedName)) + .findFirst(); + } + + static Optional targetList(List lists, String configuredName) { + String expected = normalize(configuredName); + return lists.stream() + .filter(list -> !list.closed()) + .filter(list -> normalize(list.name()).equals(expected)) + .findFirst(); + } + + static Optional firstExecutable(List searchPath, String commandName) { + return searchPath.stream() + .map(path -> path.resolve(commandName)) + .filter(path -> path.toFile().exists()) + .findFirst(); + } + + static Optional firstJavaMajor(String output) { + return output.lines() + .map(String::stripLeading) + .filter(line -> line.startsWith("java ")) + .map(OptionalLookupTerminals::firstInteger) + .flatMap(Optional::stream) + .findFirst(); + } + + private static Optional firstInteger(String value) { + return Optional.empty(); + } + + private static String normalize(String value) { + return value.toLowerCase(Locale.ROOT).replaceAll("\\s+", " ").strip(); + } + + record BoardList(String id, String name, boolean closed) {} +} +``` diff --git a/evals-reference/53-domain-selection-lazy-fallback/capability.txt b/evals-reference/53-domain-selection-lazy-fallback/capability.txt new file mode 100644 index 0000000..dc57af3 --- /dev/null +++ b/evals-reference/53-domain-selection-lazy-fallback/capability.txt @@ -0,0 +1,2 @@ +Build domain selections directly from Optional present and absent branches while keeping expensive +fallbacks lazy and checked prompt boundaries explicit. diff --git a/evals-reference/53-domain-selection-lazy-fallback/criteria.json b/evals-reference/53-domain-selection-lazy-fallback/criteria.json new file mode 100644 index 0000000..00e5660 --- /dev/null +++ b/evals-reference/53-domain-selection-lazy-fallback/criteria.json @@ -0,0 +1,54 @@ +{ + "context": "Reference implementation cleanup: Optional selections should build domain objects in one present/absent flow and keep fallback work lazy.", + "type": "weighted_checklist", + "checklist": [ + { + "name": "Creates coherent Java 25 artifact", + "category": "safety", + "max_score": 8, + "description": "Returns complete Java 25-compatible SetupSelections code with imports, constructor, methods, interfaces, and records intact." + }, + { + "name": "Builds MaxAgentsSelection in Optional flow", + "category": "optional_quality", + "max_score": 22, + "description": "Uses workflowConfig.maxAgents(workflowPath).map(...).orElseGet(...) or equivalent to construct MaxAgentsSelection with value and provenance together." + }, + { + "name": "Preserves explicit override behavior", + "category": "safety", + "max_score": 12, + "description": "Keeps explicit max-agents input winning before workflow fallback and preserves preservedFromWorkflow=false for explicit or default values." + }, + { + "name": "Transforms Codex defaults directly", + "category": "optional_quality", + "max_score": 18, + "description": "Replaces isEmpty plus orElseThrow ordinary value flow with codexModelDefaults().map(...).orElse(boardSetup) or equivalent." + }, + { + "name": "Keeps dotenv fallback lazy", + "category": "optional_quality", + "max_score": 18, + "description": "Uses Optional.or or equivalent lazy fallback so load(dotenv) runs only when no environment value is present." + }, + { + "name": "Preserves checked prompt boundary", + "category": "safety", + "max_score": 12, + "description": "Keeps promptedMaxAgents as a clear checked-IOException branch instead of forcing readLine through a generic Optional helper." + }, + { + "name": "Avoids Optional workarounds", + "category": "optional_quality", + "max_score": 10, + "description": "Does not use orElse(null), fake Optional lists or streams, generic throwing helpers, or repeated isPresent plus value reads for ordinary value flow." + } + ], + "metadata": { + "invocation": "natural", + "task_type": "cleanup", + "evidence_type": "ordinary_lift", + "issue": "https://github.com/martinfrancois/java-optionals-skill/issues/71" + } +} diff --git a/evals-reference/53-domain-selection-lazy-fallback/task.md b/evals-reference/53-domain-selection-lazy-fallback/task.md new file mode 100644 index 0000000..8efc10e --- /dev/null +++ b/evals-reference/53-domain-selection-lazy-fallback/task.md @@ -0,0 +1,93 @@ +# Build domain selections from Optional values + +Refactor `SetupSelections.java`. Assume Java 25. + +Return the revised Java code only. + +```java +import java.io.IOException; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.Map; +import java.util.Optional; + +final class SetupSelections { + private final WorkflowConfig workflowConfig; + private final BoardSetup boardSetup; + + SetupSelections(WorkflowConfig workflowConfig, BoardSetup boardSetup) { + this.workflowConfig = workflowConfig; + this.boardSetup = boardSetup; + } + + MaxAgentsSelection configureGithubMaxAgents(Options options, Path workflowPath) { + if (options.maxAgentsExplicit()) { + return new MaxAgentsSelection(options.maxAgents(), false); + } + Optional configuredMaxAgents = workflowConfig.maxAgents(workflowPath); + return new MaxAgentsSelection(configuredMaxAgents.orElseGet(options::maxAgents), configuredMaxAgents.isPresent()); + } + + BoardSetup boardSetupWithCodexModel(Options options) { + if (options.codexModelDefaults().isEmpty()) { + return boardSetup; + } + CodexModelDefaults defaults = options.codexModelDefaults().orElseThrow(); + return options.hasExplicitCodexModelRequest() + ? boardSetup.withCodexModelOverrides(defaults, options.codexModel()) + : boardSetup.withCodexModelDefaults(defaults); + } + + public static Optional firstPresent(Path dotenv, Map environment, String... names) { + for (String name : names) { + String value = environment.get(name); + if (hasText(value)) { + return Optional.of(value); + } + } + Map dotenvValues = load(dotenv); + for (String name : names) { + String value = dotenvValues.get(name); + if (hasText(value)) { + return Optional.of(value); + } + } + return Optional.empty(); + } + + int promptedMaxAgents(Terminal terminal, MaxAgentsSelection current) throws IOException { + String answer = terminal.readLine("Maximum cards from this board at once [" + current.value() + "]: "); + if (answer == null || answer.isBlank()) { + return current.value(); + } + return Integer.parseInt(answer); + } + + private static boolean hasText(String value) { + return value != null && !value.isBlank(); + } + + private static Map load(Path dotenv) { + return Map.of(); + } + + interface WorkflowConfig { Optional maxAgents(Path workflowPath); } + interface Options { + boolean maxAgentsExplicit(); + int maxAgents(); + Optional codexModelDefaults(); + boolean hasExplicitCodexModelRequest(); + String codexModel(); + } + interface Terminal { String readLine(String prompt) throws IOException; } + record MaxAgentsSelection(int value, boolean preservedFromWorkflow) {} + record CodexModelDefaults(String model) {} + record BoardSetup() { + BoardSetup withCodexModelOverrides(CodexModelDefaults defaults, String model) { return this; } + BoardSetup withCodexModelDefaults(CodexModelDefaults defaults) { return this; } + } +} +``` + +Preserve explicit option precedence, workflow preservation provenance, lazy dotenv loading, and the +checked prompt boundary. diff --git a/evals-reference/54-optional-upsert-side-effect-boundary/capability.txt b/evals-reference/54-optional-upsert-side-effect-boundary/capability.txt new file mode 100644 index 0000000..b5909da --- /dev/null +++ b/evals-reference/54-optional-upsert-side-effect-boundary/capability.txt @@ -0,0 +1,2 @@ +Refactor Optional upsert code so one present/absent boundary controls side effects while named +helpers own nontrivial present-branch policy. diff --git a/evals-reference/54-optional-upsert-side-effect-boundary/criteria.json b/evals-reference/54-optional-upsert-side-effect-boundary/criteria.json new file mode 100644 index 0000000..8a9837f --- /dev/null +++ b/evals-reference/54-optional-upsert-side-effect-boundary/criteria.json @@ -0,0 +1,54 @@ +{ + "context": "Reference cleanup: Optional upsert logic should have one clear present/absent side-effect boundary and named present-branch policy.", + "type": "weighted_checklist", + "checklist": [ + { + "name": "Creates coherent Java 17 artifact", + "category": "safety", + "max_score": 8, + "description": "Returns complete Java 17-compatible WaitingCommentSync code with imports, logger, methods, and records intact." + }, + { + "name": "Uses one Optional decision boundary", + "category": "optional_quality", + "max_score": 24, + "description": "Consumes prerequisiteWaitingComment(config, card.id()) once with ifPresentOrElse or an equivalent single present/absent boundary." + }, + { + "name": "Extracts present-branch policy", + "category": "maintainability", + "max_score": 18, + "description": "Moves same-text no-op, blank-id create, and update behavior into a named helper or otherwise makes the present-branch policy clear." + }, + { + "name": "Preserves side-effect behavior", + "category": "safety", + "max_score": 22, + "description": "Keeps absent create, same-text no-op, different-text update when id is nonblank, different-text create when id is blank, and the original add/update arguments." + }, + { + "name": "Keeps exception boundary", + "category": "safety", + "max_score": 10, + "description": "Keeps runtime exception handling around the upsert operation and preserves warning behavior." + }, + { + "name": "Avoids Optional antipatterns", + "category": "optional_quality", + "max_score": 12, + "description": "Does not reuse the same Optional in multiple pipelines, use orElse(null), isPresent/get, generic helpers, or fake collections." + }, + { + "name": "Keeps code focused", + "category": "maintainability", + "max_score": 6, + "description": "Does not redesign logging, comments, config, or repository access beyond clarifying the upsert flow." + } + ], + "metadata": { + "invocation": "natural", + "task_type": "cleanup", + "evidence_type": "ordinary_lift", + "issue": "https://github.com/martinfrancois/java-optionals-skill/issues/70" + } +} diff --git a/evals-reference/54-optional-upsert-side-effect-boundary/task.md b/evals-reference/54-optional-upsert-side-effect-boundary/task.md new file mode 100644 index 0000000..2015411 --- /dev/null +++ b/evals-reference/54-optional-upsert-side-effect-boundary/task.md @@ -0,0 +1,49 @@ +# Clarify Optional upsert flow + +Refactor `WaitingCommentSync.java`. Assume Java 17. + +Return the revised Java code only. + +```java +import java.util.Optional; +import java.util.logging.Logger; + +final class WaitingCommentSync { + private static final Logger LOG = Logger.getLogger(WaitingCommentSync.class.getName()); + + private void upsertPrerequisiteWaitingComment(Config config, Card card, String text) { + try { + Optional existing = prerequisiteWaitingComment(config, card.id()); + if (existing.map(Comment::text).filter(text::equals).isPresent()) { + return; + } + existing.filter(comment -> !blank(comment.id())) + .ifPresentOrElse( + comment -> updateComment(config, comment.id(), text), + () -> addComment(config, card.id(), text)); + } catch (RuntimeException e) { + LOG.warning("card_id=" + card.id() + " prerequisite_waiting_comment=failed reason=" + e.getMessage()); + } + } + + private Optional prerequisiteWaitingComment(Config config, String cardId) { + return Optional.empty(); + } + + private void updateComment(Config config, String commentId, String text) {} + private void addComment(Config config, String cardId, String text) {} + private static boolean blank(String value) { return value == null || value.isBlank(); } + + record Config() {} + record Card(String id) {} + record Comment(String id, String text) {} +} +``` + +Preserve these outcomes: absent comment creates one; same text does nothing; different text with a +nonblank id updates; different text with a blank id creates one; runtime exceptions are logged at +the upsert boundary. + +Use one Optional present/absent side-effect boundary for the existing comment, avoid +`isPresent()`/`get()`, and put the nontrivial present-comment policy in a named helper or equally +clear method boundary. diff --git a/evals-reference/55-side-effecting-ifpresentorelse-lookup/capability.txt b/evals-reference/55-side-effecting-ifpresentorelse-lookup/capability.txt new file mode 100644 index 0000000..7a93a55 --- /dev/null +++ b/evals-reference/55-side-effecting-ifpresentorelse-lookup/capability.txt @@ -0,0 +1,2 @@ +Use ifPresentOrElse for Java side-effecting Optional present and absent branches while preserving +rendering order and checked boundaries. diff --git a/evals-reference/55-side-effecting-ifpresentorelse-lookup/criteria.json b/evals-reference/55-side-effecting-ifpresentorelse-lookup/criteria.json new file mode 100644 index 0000000..ad0cc85 --- /dev/null +++ b/evals-reference/55-side-effecting-ifpresentorelse-lookup/criteria.json @@ -0,0 +1,54 @@ +{ + "context": "Reference cleanup: Java 9+ side-effecting present/empty Optional branches should use ifPresentOrElse when it preserves behavior.", + "type": "weighted_checklist", + "checklist": [ + { + "name": "Creates coherent Java 17 artifact", + "category": "safety", + "max_score": 8, + "description": "Returns complete Java 17-compatible PrivateContextRenderer code with imports, methods, records, and checked IOException behavior intact." + }, + { + "name": "Uses ifPresentOrElse for side effects", + "category": "optional_quality", + "max_score": 28, + "description": "Replaces isPresent plus orElseThrow with request.lookup().ifPresentOrElse(...) or equivalent single Optional terminal for the present and empty rendering branches." + }, + { + "name": "Preserves lookup-only output", + "category": "safety", + "max_score": 18, + "description": "When lookup is present, renders the common header and lookup section but does not render Local Paths or Workflow Identifiers." + }, + { + "name": "Preserves full-context output", + "category": "safety", + "max_score": 16, + "description": "When lookup is absent, renders Local Paths and Workflow Identifiers in the original order after the common header." + }, + { + "name": "Avoids exception-oriented present read", + "category": "optional_quality", + "max_score": 12, + "description": "Does not use orElseThrow, get, or a repeated lookup() call to retrieve a value already selected by an Optional presence branch." + }, + { + "name": "Avoids fake transformations", + "category": "optional_quality", + "max_score": 10, + "description": "Does not use map, stream, or intermediate collections in a way that hides side effects or changes rendering order." + }, + { + "name": "Keeps refactor focused", + "category": "maintainability", + "max_score": 8, + "description": "Does not redesign diagnostics rendering, hashing, section names, or public records beyond the Optional branch cleanup." + } + ], + "metadata": { + "invocation": "natural", + "task_type": "cleanup", + "evidence_type": "ordinary_lift", + "issue": "https://github.com/martinfrancois/java-optionals-skill/issues/69" + } +} diff --git a/evals-reference/55-side-effecting-ifpresentorelse-lookup/task.md b/evals-reference/55-side-effecting-ifpresentorelse-lookup/task.md new file mode 100644 index 0000000..bdee53b --- /dev/null +++ b/evals-reference/55-side-effecting-ifpresentorelse-lookup/task.md @@ -0,0 +1,54 @@ +# Clean up lookup rendering branch + +Create `PrivateContextRenderer.java` with the refactored class. Assume Java 17. + +Return the complete revised Java code only. + +```java +import java.io.IOException; +import java.time.Instant; +import java.util.Optional; + +final class PrivateContextRenderer { + private String renderPrivateContext(DiagnosticsRequest request, Optional sharedTokenHasher) + throws IOException { + DiagnosticsContext context = diagnosticsContext(request, sharedTokenHasher); + + StringBuilder body = new StringBuilder(); + body.append("# Private Context\n\n"); + line(body, "time_utc", Instant.now().toString()); + line(body, "command", request.command()); + if (request.lookup().isPresent()) { + section(body, "Lookup"); + appendPrivateContextLookup(body, context, request.lookup().orElseThrow()); + return body.toString(); + } + + section(body, "Local Paths"); + appendLocalPaths(body, context); + section(body, "Workflow Identifiers"); + appendWorkflowIdentifiers(body, context); + return body.toString(); + } + + private DiagnosticsContext diagnosticsContext(DiagnosticsRequest request, Optional hasher) + throws IOException { + return new DiagnosticsContext(); + } + + private static void section(StringBuilder body, String title) { body.append("\\n## ").append(title).append("\\n"); } + private static void line(StringBuilder body, String name, String value) { body.append(name).append(": ").append(value).append("\\n"); } + private static void appendPrivateContextLookup(StringBuilder body, DiagnosticsContext context, String lookup) {} + private static void appendLocalPaths(StringBuilder body, DiagnosticsContext context) {} + private static void appendWorkflowIdentifiers(StringBuilder body, DiagnosticsContext context) {} + + record DiagnosticsRequest(String command, Optional lookup) {} + record DiagnosticsContext() {} + record TokenHasher() {} +} +``` + +When lookup is present, render only the lookup section after the common header. When lookup is absent, +render the full private context sections. Preserve ordering and checked exception behavior. +Use `ifPresentOrElse` or an equivalent Optional terminal for the side-effecting present and absent +branches; do not hide rendering mutations inside `map` or another fake transformation. diff --git a/evals-reference/56-optional-helper-trigger-lifecycle-refactor/capability.txt b/evals-reference/56-optional-helper-trigger-lifecycle-refactor/capability.txt new file mode 100644 index 0000000..2e0961a --- /dev/null +++ b/evals-reference/56-optional-helper-trigger-lifecycle-refactor/capability.txt @@ -0,0 +1,2 @@ +Refactor lifecycle validation so Optional selector handling happens at one boundary and the domain +validation helper accepts the selected Path directly. diff --git a/evals-reference/56-optional-helper-trigger-lifecycle-refactor/criteria.json b/evals-reference/56-optional-helper-trigger-lifecycle-refactor/criteria.json new file mode 100644 index 0000000..5e9f2cc --- /dev/null +++ b/evals-reference/56-optional-helper-trigger-lifecycle-refactor/criteria.json @@ -0,0 +1,54 @@ +{ + "context": "Reference natural implementation: lifecycle selector validation should centralize repeated preflight and bind Optional values at the boundary.", + "type": "weighted_checklist", + "checklist": [ + { + "name": "Creates coherent Java 17 snippets", + "category": "safety", + "max_score": 8, + "description": "Returns coherent Java 17-compatible revised snippets with lifecycle methods, validation helpers, records, and IOException behavior intact." + }, + { + "name": "Centralizes lifecycle preflight", + "category": "maintainability", + "max_score": 18, + "description": "stop, status, and logs share one helper that performs explicit-workflow validation before loading the lifecycle manifest." + }, + { + "name": "Uses Optional terminal at boundary", + "category": "optional_quality", + "max_score": 22, + "description": "Uses explicitWorkflow.ifPresent(...) or an equivalent Optional terminal at the boundary instead of passing Optional to a helper that only inspects presence." + }, + { + "name": "Domain helper accepts Path", + "category": "optional_quality", + "max_score": 14, + "description": "The helper that validates the selected workflow accepts Path, not Optional, so the domain validation is separated from optionality." + }, + { + "name": "Removes isEmpty/get flow", + "category": "optional_quality", + "max_score": 14, + "description": "Does not keep Optional.isEmpty() followed by Optional.get() to validate the explicit workflow selector." + }, + { + "name": "Preserves validation behavior", + "category": "safety", + "max_score": 16, + "description": "Keeps the missing-workflow message, regular-file validation message, validation-before-manifest-load order, and selectForStop/status/logs behavior; simplifying the redundant exists guard is acceptable when observable behavior is unchanged." + }, + { + "name": "Avoids generic Optional utility", + "category": "maintainability", + "max_score": 8, + "description": "Does not introduce broad Optional helpers, fake collections, or unrelated lifecycle redesign to hide the presence check." + } + ], + "metadata": { + "invocation": "natural", + "task_type": "cleanup", + "evidence_type": "ordinary_lift", + "issue": "https://github.com/martinfrancois/java-optionals-skill/issues/67" + } +} diff --git a/evals-reference/56-optional-helper-trigger-lifecycle-refactor/task.md b/evals-reference/56-optional-helper-trigger-lifecycle-refactor/task.md new file mode 100644 index 0000000..473c304 --- /dev/null +++ b/evals-reference/56-optional-helper-trigger-lifecycle-refactor/task.md @@ -0,0 +1,104 @@ +# Refactor lifecycle selector validation + +Refactor this lifecycle selector validation so status, stop, and logs share the same explicit +workflow preflight before loading the manifest. Preserve behavior and public messages. Keep the +change small. Assume Java 17. + +Create `LocalWorkerManager.java` with the revised class. Return the complete revised Java code only. +The shared preflight should bind `workflow` at the Optional boundary and call a Path-taking +validation helper; do not keep `Optional.isEmpty()` followed by `Optional.get()` inside the helper. +Do not leave `requireExistingExplicitWorkflow(Optional workflow)` in place; after the refactor, +the selected workflow validation helper should receive a `Path`, and the code should not call +`workflow.get()`. +Do not duplicate the `workflow.map(...).ifPresent(...)` preflight chain in `stop`, `status`, and +`logs`; extract that Optional-boundary preflight into one shared helper called by all three methods. + +```java +import java.io.IOException; +import java.io.PrintStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.Optional; + +final class LocalWorkerManager { + int stop(StopWorkerRequest request, PrintStream out) throws IOException { + LocalWorkerPaths paths = LocalWorkerPaths.from(request.configDir()); + requireExistingExplicitWorkflow(request.workflow()); + ConnectedBoardManifest manifest = new ConnectedBoardRepository(paths.manifestPath()).loadForLifecycle(); + List boards = selectForStop(manifest, request.board(), request.workflow()); + return boards.size(); + } + + int status(WorkerStatusRequest request, PrintStream out) throws IOException { + LocalWorkerPaths paths = LocalWorkerPaths.from(request.configDir()); + requireExistingExplicitWorkflow(request.workflow()); + ConnectedBoardManifest manifest = new ConnectedBoardRepository(paths.manifestPath()).loadForLifecycle(); + List boards = selectForStatus(manifest, request.board(), request.workflow()); + return boards.size(); + } + + int logs(WorkerLogsRequest request, PrintStream out) throws IOException { + LocalWorkerPaths paths = LocalWorkerPaths.from(request.configDir()); + requireExistingExplicitWorkflow(request.workflow()); + ConnectedBoardManifest manifest = new ConnectedBoardRepository(paths.manifestPath()).loadForLifecycle(); + ConnectedBoard board = selectOne(manifest, request.board(), request.workflow()); + return board.name().length(); + } + + private static void requireExistingExplicitWorkflow(Optional workflow) { + if (workflow.isEmpty()) { + return; + } + Path workflowPath = workflow.get().toAbsolutePath().normalize(); + if (!Files.exists(workflowPath)) { + throw new TrelloBoardSetupException( + "setup_invalid_arguments", "--workflow must point to an existing workflow file."); + } + validateWorkerWorkflowPath(workflowPath); + } + + private static void validateWorkerWorkflowPath(Path workflowPath) { + if (Files.exists(workflowPath) && !Files.isRegularFile(workflowPath)) { + throw new TrelloBoardSetupException( + "setup_invalid_arguments", "--workflow must point to a regular workflow file."); + } + } + + private List selectForStop( + ConnectedBoardManifest manifest, Optional board, Optional workflow) { + return List.of(); + } + + private List selectForStatus( + ConnectedBoardManifest manifest, Optional board, Optional workflow) { + return List.of(); + } + + private ConnectedBoard selectOne(ConnectedBoardManifest manifest, Optional board, Optional workflow) { + return new ConnectedBoard("demo"); + } + + record StopWorkerRequest(Path configDir, Optional board, Optional workflow) {} + record WorkerStatusRequest(Path configDir, Optional board, Optional workflow) {} + record WorkerLogsRequest(Path configDir, Optional board, Optional workflow) {} + record LocalWorkerPaths(Path manifestPath) { + static LocalWorkerPaths from(Path configDir) { + return new LocalWorkerPaths(configDir.resolve("connected-boards.json")); + } + } + record ConnectedBoard(String name) {} + record ConnectedBoardManifest(List boards) {} + static final class ConnectedBoardRepository { + ConnectedBoardRepository(Path manifestPath) {} + ConnectedBoardManifest loadForLifecycle() throws IOException { + return new ConnectedBoardManifest(List.of()); + } + } + static final class TrelloBoardSetupException extends RuntimeException { + TrelloBoardSetupException(String code, String message) { + super(message); + } + } +} +``` diff --git a/evals-reference/NUMBERING.md b/evals-reference/NUMBERING.md index 7482753..75849cd 100644 --- a/evals-reference/NUMBERING.md +++ b/evals-reference/NUMBERING.md @@ -3,3 +3,9 @@ Reference scenarios keep historical numbers from earlier suites. Gaps are intentional when a case was removed, renamed, promoted to main eval coverage, moved between main eval and reference coverage, or moved to `evals-regression/` after repeated both-variant 100% results. + +Numbers `51` through `56` were added during the July 2026 open-issue sweep. They cover reference +scenarios for presence-to-enum selection, findAny/findFirst Optional terminals, domain selections +with lazy fallback, side-effecting upsert boundaries, ifPresentOrElse rendering branches, and +lifecycle Optional helper boundaries. Keep them in `evals-reference/` until isolated hosted runs +classify each scenario. diff --git a/scripts/run_eval_suite.sh b/scripts/run_eval_suite.sh index c75ebf7..352607e 100755 --- a/scripts/run_eval_suite.sh +++ b/scripts/run_eval_suite.sh @@ -7,16 +7,32 @@ Usage: scripts/run_eval_suite.sh [scenario ...] [-- tessl eval run args...] Runs hosted Tessl evals with the repository's variant policy: - main -> without-context and with-context - reference -> without-context and with-context + main -> baseline control and with-context + reference -> baseline control and with-context regression -> with-context only Examples: scripts/run_eval_suite.sh main -- --label "main check" scripts/run_eval_suite.sh reference 01-display-name -- --label "targeted reference" scripts/run_eval_suite.sh regression -- --label "regression safety" - -Do not pass --variant. This script chooses variants from the suite purpose. + scripts/run_eval_suite.sh main -- --agent claude:claude-sonnet-4-6 --label "representative model check" + +Model-selection note: + Do not pin Sonnet in default commands; the script runs with the current Tessl default solver. + On accounts without model-selection entitlements (including many free plans), passing + `--agent` for a specific model (for example, `claude:claude-sonnet-4-6`) can return + a "Missing required entitlement" error. Prefer default commands for routine checks and + save explicit model pins for accounts where modelSelection is enabled. + If model-selection is available, Sonnet 4.6 or a better model is a good representative check. + See Tessl model-selection and default-model discussions: + - https://docs.tessl.io/changelog + - https://tessl.io/blog/why-were-changing-our-default-eval-model/ + +Do not pass --variant or --skip-baseline. This script chooses variants from the suite purpose. +The default Tessl solver is used unless an explicit --agent is passed after --. +The runner passes --skill java-optionals so with-context runs exercise this skill instead of relying +on solver auto-selection for final readiness evidence. It also passes --force so runs after a skill +or runner fix cannot reuse stale hosted solutions. USAGE } @@ -42,15 +58,15 @@ shift case "$suite" in main) source_dir="evals" - variants=(--variant without-context --variant with-context) + variant_label="baseline control + with-context" ;; reference) source_dir="evals-reference" - variants=(--variant without-context --variant with-context) + variant_label="baseline control + with-context" ;; regression) source_dir="evals-regression" - variants=(--variant with-context) + variant_label="with-context only" ;; -h|--help|help) usage @@ -72,8 +88,8 @@ while [[ $# -gt 0 ]]; do extra_args=("$@") break ;; - --variant|--variant=*) - echo "Do not pass --variant; scripts/run_eval_suite.sh chooses variants by suite." >&2 + --variant|--variant=*|--skip-baseline|--skip-baseline=*) + echo "Do not pass --variant or --skip-baseline; scripts/run_eval_suite.sh chooses variants by suite." >&2 exit 2 ;; *) @@ -85,8 +101,8 @@ done for arg in "${extra_args[@]}"; do case "$arg" in - --variant|--variant=*) - echo "Do not pass --variant; scripts/run_eval_suite.sh chooses variants by suite." >&2 + --variant|--variant=*|--skip-baseline|--skip-baseline=*) + echo "Do not pass --variant or --skip-baseline; scripts/run_eval_suite.sh chooses variants by suite." >&2 exit 2 ;; esac @@ -97,36 +113,48 @@ if ! command -v tessl >/dev/null 2>&1; then exit 127 fi +eval_run_help="$(tessl eval run --help 2>&1 || true)" +if grep -q -- "--variant" <<<"$eval_run_help"; then + case "$suite" in + main|reference) + variant_args=(--variant without-context --variant with-context) + ;; + regression) + variant_args=(--variant with-context) + ;; + esac +elif grep -q -- "--skip-baseline" <<<"$eval_run_help"; then + case "$suite" in + main|reference) + variant_args=() + ;; + regression) + variant_args=(--skip-baseline) + ;; + esac +else + echo "Unsupported tessl eval run CLI: expected --variant or --skip-baseline support." >&2 + exit 2 +fi + repo_root="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" source_path="$repo_root/$source_dir" +skill_args=(--skill java-optionals) +freshness_args=(--force) if [[ ! -d "$source_path" ]]; then echo "Missing suite directory: $source_path" >&2 exit 1 fi -has_agent=false -for arg in "${extra_args[@]}"; do - case "$arg" in - --agent|--agent=*) - has_agent=true - ;; - esac -done - -agent_args=() -if [[ "$has_agent" == false ]]; then - agent_args=(--agent claude:claude-sonnet-4-6) -fi - if [[ "$suite" == "main" && "${#scenarios[@]}" -eq 0 ]]; then echo "Running main eval suite from the linked plugin path." echo "Scenarios:" print_suite_scenarios "$source_path" - echo "Variants: ${variants[*]}" + echo "Eval mode: $variant_label" ( cd "$repo_root" - tessl eval run "${agent_args[@]}" "${variants[@]}" "${extra_args[@]}" . + tessl eval run "${variant_args[@]}" "${skill_args[@]}" "${freshness_args[@]}" "${extra_args[@]}" . ) exit 0 fi @@ -194,9 +222,9 @@ fi echo "Running $suite eval suite from the linked plugin path with a temporary evals/ staging area." echo "Scenarios:" print_suite_scenarios "$staged_evals" -echo "Variants: ${variants[*]}" +echo "Eval mode: $variant_label" ( cd "$repo_root" - tessl eval run "${agent_args[@]}" "${variants[@]}" "${extra_args[@]}" . + tessl eval run "${variant_args[@]}" "${skill_args[@]}" "${freshness_args[@]}" "${extra_args[@]}" . ) diff --git a/scripts/validate_publish_ready.sh b/scripts/validate_publish_ready.sh index 998a2f7..23a4cb3 100755 --- a/scripts/validate_publish_ready.sh +++ b/scripts/validate_publish_ready.sh @@ -6,5 +6,5 @@ python3 scripts/validate_eval_criteria.py evals evals-reference evals-regression python3 -m py_compile scripts/*.py bash -n scripts/*.sh tessl plugin lint . -tessl skill review --threshold 100 skills/java-optionals/SKILL.md +tessl review run --workspace martinfrancois --threshold 100 skills/java-optionals/SKILL.md tessl plugin publish --dry-run .