fix(review): tolerate unescaped inner quotes in review JSON (#68)

JohnnyVicious · web-flow · commit 09011d2c08db · 2026-04-14T21:00:42.000+02:00
Real-world report on v1.0.10: an adversarial review against PR #412
came back with a `summary` field that embeds a literal
`{"success": false, "error": "..."}` with unescaped inner quotes.
Strict `JSON.parse` dies at the first `"success"`, `tryParseJson`
returns null, and the companion then falls through to printing the
raw JSON text in the Claude Code chat AND embedding that same raw
JSON in the posted PR comment body. Both outputs looked like
garbage.

Replace `tryParseJson` with a schema-aware `tryParseReview` that:

1. Fast path: strict `JSON.parse` (+ ```json fence stripping). Same
   behavior as before for well-formed reviews.
2. Lenient fallback: when strict parse fails, extract each known
   top-level key by name — `verdict` via a closed-vocabulary regex,
   `summary` by slicing between `"summary": "` and the next
   `", "findings"` anchor (or the last `"` before `}` when findings
   is absent), `findings` via a depth-aware bracket walker that
   tracks JSON string state. If the extracted findings array fails
   to parse, return an empty array — better to show verdict +
   summary than nothing.

The new parser is narrowly tailored to our `{verdict, summary,
findings}` schema, not a general-purpose JSON repair library. That
makes it small (~200 lines, zero deps) and easy to reason about.

Delete the old `tryParseJson` helper — its only callers were the
two review handlers, both now use `tryParseReview`.

New test file `tests/review-parser.test.mjs` includes a regression
pinned to the exact broken shape from the real failure report, plus
coverage for the sliceMatchingBracket walker, fence stripping,
malformed-findings recovery, and the no-findings fallback path.
Full suite: 268/268 pass.
diff --git a/plugins/opencode/scripts/lib/review-parser.mjs b/plugins/opencode/scripts/lib/review-parser.mjs
@@ -0,0 +1,218 @@
+// Schema-aware parser for OpenCode review output.
+//
+// The review system prompt asks the model for a JSON object shaped
+// like `{ verdict, summary, findings: [...] }`. Models frequently
+// produce JSON that is structurally valid *except* for a string
+// field (usually `summary` or a finding's `body`) that contains a
+// literal `"` that should have been escaped `\"`.
+//
+// The common failure mode looks like this:
+//
+//     {
+//       "verdict": "approve",
+//       "summary": "... explicit {"success": false, "error": "..."} ...",
+//       "findings": []
+//     }
+//
+// `JSON.parse` bails at the first unescaped `"` inside `summary`, so
+// `structured` comes back null and the companion falls through to
+// printing the raw text. We then render ugly JSON in the chat AND in
+// the posted PR comment — see issue report on v1.0.10.
+//
+// Fix strategy:
+//   1. Try strict `JSON.parse` first (fast path).
+//   2. If that fails, extract each top-level field by anchoring on its
+//      key name. We slice `summary` between `"summary": "` and the
+//      `", "findings": [...]` anchor that comes after it, so embedded
+//      quotes never break the extraction.
+//   3. For `findings`, do a depth-aware bracket walk that tracks
+//      string state, and attempt to parse the extracted array. If
+//      even that fails, return an empty findings array — we'd rather
+//      show verdict + summary than give up entirely.
+//
+// This is NOT a general-purpose JSON repair library. It is narrowly
+// tailored to the `{verdict, summary, findings}` review schema and
+// assumes the model emits the fields in that order (which our prompt
+// template encourages). Anything outside that schema falls through to
+// `null` and the caller treats the output as unstructured.
+
+/**
+ * @typedef {{
+ *   verdict: string,
+ *   summary: string,
+ *   findings: Array<object>,
+ * }} Review
+ */
+
+/**
+ * Try to parse `text` as an OpenCode review. Returns `null` when even
+ * the lenient fallback can't recover the verdict, which is the minimum
+ * the caller needs to render anything useful.
+ *
+ * @param {string} text
+ * @returns {Review|null}
+ */
+export function tryParseReview(text) {
+  if (typeof text !== "string") return null;
+  const candidate = stripCodeFence(text).trim();
+  if (!candidate) return null;
+
+  // Fast path: strict JSON.parse.
+  try {
+    const parsed = JSON.parse(candidate);
+    if (parsed && typeof parsed === "object") {
+      return normalizeReview(parsed);
+    }
+  } catch {
+    // fall through to lenient extraction
+  }
+
+  return lenientExtract(candidate);
+}
+
+/**
+ * Strip a ```json … ``` code fence if present. Returns the inner
+ * content, or the original text when there is no fence.
+ */
+function stripCodeFence(text) {
+  const fenced = /```(?:json)?\s*\n?([\s\S]*?)\n?```/.exec(text);
+  return fenced ? fenced[1] : text;
+}
+
+function normalizeReview(parsed) {
+  const verdict = typeof parsed.verdict === "string" ? parsed.verdict : null;
+  if (!verdict) return null;
+  const summary = typeof parsed.summary === "string" ? parsed.summary : "";
+  const findings = Array.isArray(parsed.findings) ? parsed.findings : [];
+  return { verdict, summary, findings };
+}
+
+// ---------------------------------------------------------------------
+// Lenient extraction
+// ---------------------------------------------------------------------
+
+/**
+ * Schema-aware extractor used when strict JSON.parse fails. Walks
+ * `text` looking for the three known top-level keys by name.
+ *
+ * @param {string} text
+ * @returns {Review|null}
+ */
+function lenientExtract(text) {
+  const verdict = extractVerdict(text);
+  if (!verdict) return null;
+  const summary = extractSummary(text) ?? "";
+  const findings = extractFindings(text);
+  return { verdict, summary, findings };
+}
+
+/**
+ * Verdict values are from a closed vocabulary (`approve` or
+ * `needs-attention`), so a plain regex is safe — there is no way for
+ * a verdict value to itself contain a `"`.
+ */
+function extractVerdict(text) {
+  const m = /"verdict"\s*:\s*"(approve|needs-attention)"/.exec(text);
+  return m ? m[1] : null;
+}
+
+/**
+ * Slice `summary` between `"summary": "` and the next occurrence of
+ * the `", "findings"` anchor. Anything in between — including literal
+ * unescaped `"` characters — is treated as part of the summary. When
+ * the anchor isn't present we fall back to slicing to the last `"`
+ * before the closing `}`, so summaries in malformed responses that
+ * are missing the findings field still come through.
+ */
+function extractSummary(text) {
+  const startKey = /"summary"\s*:\s*"/.exec(text);
+  if (!startKey) return null;
+  const sliceStart = startKey.index + startKey[0].length;
+
+  // Preferred anchor: the `", "findings"` transition. The regex allows
+  // whitespace / newlines between the closing quote and the next key.
+  const endAnchor = /"\s*,\s*"findings"\s*:/g;
+  endAnchor.lastIndex = sliceStart;
+  const endMatch = endAnchor.exec(text);
+  if (endMatch) {
+    return text.substring(sliceStart, endMatch.index);
+  }
+
+  // Fallback: slice to the last `"` before the outermost closing `}`.
+  const lastBrace = text.lastIndexOf("}");
+  const searchEnd = lastBrace > sliceStart ? lastBrace : text.length;
+  const lastQuote = text.lastIndexOf('"', searchEnd);
+  if (lastQuote > sliceStart) {
+    return text.substring(sliceStart, lastQuote);
+  }
+  return null;
+}
+
+/**
+ * Extract the `findings` array. Uses a depth-aware walker that tracks
+ * JSON string state so brackets inside string literals don't confuse
+ * the bracket counter. If the extracted slice fails strict JSON.parse
+ * we return an empty array — we'd rather show the verdict + summary
+ * than nothing at all.
+ */
+function extractFindings(text) {
+  const startKey = /"findings"\s*:\s*\[/.exec(text);
+  if (!startKey) return [];
+  const arrayStart = startKey.index + startKey[0].length - 1; // points at `[`
+  const arrayText = sliceMatchingBracket(text, arrayStart);
+  if (!arrayText) return [];
+  try {
+    const parsed = JSON.parse(arrayText);
+    return Array.isArray(parsed) ? parsed : [];
+  } catch {
+    return [];
+  }
+}
+
+/**
+ * Walk `text` starting at `openIdx` (which must point at a `[` or
+ * `{`) and return the substring up to and including the matching
+ * closing bracket, or `null` if no match is found. Tracks JSON string
+ * state so brackets inside strings don't affect the depth counter.
+ *
+ * Exported for tests.
+ *
+ * @param {string} text
+ * @param {number} openIdx
+ * @returns {string|null}
+ */
+export function sliceMatchingBracket(text, openIdx) {
+  const open = text[openIdx];
+  if (open !== "[" && open !== "{") return null;
+  const close = open === "[" ? "]" : "}";
+  let depth = 0;
+  let inString = false;
+  let escape = false;
+  for (let i = openIdx; i < text.length; i += 1) {
+    const ch = text[i];
+    if (escape) {
+      escape = false;
+      continue;
+    }
+    if (ch === "\\") {
+      escape = true;
+      continue;
+    }
+    if (inString) {
+      if (ch === '"') inString = false;
+      continue;
+    }
+    if (ch === '"') {
+      inString = true;
+      continue;
+    }
+    if (ch === open) depth += 1;
+    if (ch === close) {
+      depth -= 1;
+      if (depth === 0) {
+        return text.substring(openIdx, i + 1);
+      }
+    }
+  }
+  return null;
+}
diff --git a/plugins/opencode/scripts/opencode-companion.mjs b/plugins/opencode/scripts/opencode-companion.mjs
@@ -84,6 +84,7 @@ import {
 import { readJson, readDenyRules } from "./lib/fs.mjs";
 import { resolveReviewAgent } from "./lib/review-agent.mjs";
 import { preparePostInstructions, formatPostTrailer } from "./lib/pr-comments.mjs";
+import { tryParseReview } from "./lib/review-parser.mjs";
 import { parseModelString, selectFreeModel } from "./lib/model.mjs";
 import {
   applyDefaultModelOptions,
@@ -381,9 +382,12 @@ async function handleReview(argv) {
 
       report("finalizing", "Processing review output...");
 
-      // Try to parse structured output
+      // Try to parse structured output. `tryParseReview` tolerates the
+      // common LLM failure mode where a string value contains an
+      // unescaped `"` (usually embedded code or JSON literals in the
+      // summary) and strict JSON.parse would otherwise give up.
       const text = extractResponseText(response);
-      let structured = tryParseJson(text);
+      let structured = tryParseReview(text);
       const usedModel = extractResponseModel(response);
 
       return {
@@ -509,8 +513,9 @@ async function handleAdversarialReview(argv) {
 
       report("finalizing", "Processing review output...");
 
+      // See note on `tryParseReview` in handleReview — same reason.
       const text = extractResponseText(response);
-      let structured = tryParseJson(text);
+      let structured = tryParseReview(text);
       const usedModel = extractResponseModel(response);
 
       return {
@@ -1319,20 +1324,3 @@ function extractResponseText(response) {
 
   return JSON.stringify(response, null, 2);
 }
-
-
-/**
- * Try to parse a string as JSON, returning null on failure.
- * @param {string} text
- * @returns {object|null}
- */
-function tryParseJson(text) {
-  // Look for JSON in the text (may be wrapped in markdown code blocks)
-  const jsonMatch = text.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
-  const candidate = jsonMatch ? jsonMatch[1] : text;
-  try {
-    return JSON.parse(candidate.trim());
-  } catch {
-    return null;
-  }
-}
diff --git a/tests/review-parser.test.mjs b/tests/review-parser.test.mjs