|
| 1 | +// Schema-aware parser for OpenCode review output. |
| 2 | +// |
| 3 | +// The review system prompt asks the model for a JSON object shaped |
| 4 | +// like `{ verdict, summary, findings: [...] }`. Models frequently |
| 5 | +// produce JSON that is structurally valid *except* for a string |
| 6 | +// field (usually `summary` or a finding's `body`) that contains a |
| 7 | +// literal `"` that should have been escaped `\"`. |
| 8 | +// |
| 9 | +// The common failure mode looks like this: |
| 10 | +// |
| 11 | +// { |
| 12 | +// "verdict": "approve", |
| 13 | +// "summary": "... explicit {"success": false, "error": "..."} ...", |
| 14 | +// "findings": [] |
| 15 | +// } |
| 16 | +// |
| 17 | +// `JSON.parse` bails at the first unescaped `"` inside `summary`, so |
| 18 | +// `structured` comes back null and the companion falls through to |
| 19 | +// printing the raw text. We then render ugly JSON in the chat AND in |
| 20 | +// the posted PR comment — see issue report on v1.0.10. |
| 21 | +// |
| 22 | +// Fix strategy: |
| 23 | +// 1. Try strict `JSON.parse` first (fast path). |
| 24 | +// 2. If that fails, extract each top-level field by anchoring on its |
| 25 | +// key name. We slice `summary` between `"summary": "` and the |
| 26 | +// `", "findings": [...]` anchor that comes after it, so embedded |
| 27 | +// quotes never break the extraction. |
| 28 | +// 3. For `findings`, do a depth-aware bracket walk that tracks |
| 29 | +// string state, and attempt to parse the extracted array. If |
| 30 | +// even that fails, return an empty findings array — we'd rather |
| 31 | +// show verdict + summary than give up entirely. |
| 32 | +// |
| 33 | +// This is NOT a general-purpose JSON repair library. It is narrowly |
| 34 | +// tailored to the `{verdict, summary, findings}` review schema and |
| 35 | +// assumes the model emits the fields in that order (which our prompt |
| 36 | +// template encourages). Anything outside that schema falls through to |
| 37 | +// `null` and the caller treats the output as unstructured. |
| 38 | + |
| 39 | +/** |
| 40 | + * @typedef {{ |
| 41 | + * verdict: string, |
| 42 | + * summary: string, |
| 43 | + * findings: Array<object>, |
| 44 | + * }} Review |
| 45 | + */ |
| 46 | + |
| 47 | +/** |
| 48 | + * Try to parse `text` as an OpenCode review. Returns `null` when even |
| 49 | + * the lenient fallback can't recover the verdict, which is the minimum |
| 50 | + * the caller needs to render anything useful. |
| 51 | + * |
| 52 | + * @param {string} text |
| 53 | + * @returns {Review|null} |
| 54 | + */ |
| 55 | +export function tryParseReview(text) { |
| 56 | + if (typeof text !== "string") return null; |
| 57 | + const candidate = stripCodeFence(text).trim(); |
| 58 | + if (!candidate) return null; |
| 59 | + |
| 60 | + // Fast path: strict JSON.parse. |
| 61 | + try { |
| 62 | + const parsed = JSON.parse(candidate); |
| 63 | + if (parsed && typeof parsed === "object") { |
| 64 | + return normalizeReview(parsed); |
| 65 | + } |
| 66 | + } catch { |
| 67 | + // fall through to lenient extraction |
| 68 | + } |
| 69 | + |
| 70 | + return lenientExtract(candidate); |
| 71 | +} |
| 72 | + |
| 73 | +/** |
| 74 | + * Strip a ```json … ``` code fence if present. Returns the inner |
| 75 | + * content, or the original text when there is no fence. |
| 76 | + */ |
| 77 | +function stripCodeFence(text) { |
| 78 | + const fenced = /```(?:json)?\s*\n?([\s\S]*?)\n?```/.exec(text); |
| 79 | + return fenced ? fenced[1] : text; |
| 80 | +} |
| 81 | + |
| 82 | +function normalizeReview(parsed) { |
| 83 | + const verdict = typeof parsed.verdict === "string" ? parsed.verdict : null; |
| 84 | + if (!verdict) return null; |
| 85 | + const summary = typeof parsed.summary === "string" ? parsed.summary : ""; |
| 86 | + const findings = Array.isArray(parsed.findings) ? parsed.findings : []; |
| 87 | + return { verdict, summary, findings }; |
| 88 | +} |
| 89 | + |
| 90 | +// --------------------------------------------------------------------- |
| 91 | +// Lenient extraction |
| 92 | +// --------------------------------------------------------------------- |
| 93 | + |
| 94 | +/** |
| 95 | + * Schema-aware extractor used when strict JSON.parse fails. Walks |
| 96 | + * `text` looking for the three known top-level keys by name. |
| 97 | + * |
| 98 | + * @param {string} text |
| 99 | + * @returns {Review|null} |
| 100 | + */ |
| 101 | +function lenientExtract(text) { |
| 102 | + const verdict = extractVerdict(text); |
| 103 | + if (!verdict) return null; |
| 104 | + const summary = extractSummary(text) ?? ""; |
| 105 | + const findings = extractFindings(text); |
| 106 | + return { verdict, summary, findings }; |
| 107 | +} |
| 108 | + |
| 109 | +/** |
| 110 | + * Verdict values are from a closed vocabulary (`approve` or |
| 111 | + * `needs-attention`), so a plain regex is safe — there is no way for |
| 112 | + * a verdict value to itself contain a `"`. |
| 113 | + */ |
| 114 | +function extractVerdict(text) { |
| 115 | + const m = /"verdict"\s*:\s*"(approve|needs-attention)"/.exec(text); |
| 116 | + return m ? m[1] : null; |
| 117 | +} |
| 118 | + |
| 119 | +/** |
| 120 | + * Slice `summary` between `"summary": "` and the next occurrence of |
| 121 | + * the `", "findings"` anchor. Anything in between — including literal |
| 122 | + * unescaped `"` characters — is treated as part of the summary. When |
| 123 | + * the anchor isn't present we fall back to slicing to the last `"` |
| 124 | + * before the closing `}`, so summaries in malformed responses that |
| 125 | + * are missing the findings field still come through. |
| 126 | + */ |
| 127 | +function extractSummary(text) { |
| 128 | + const startKey = /"summary"\s*:\s*"/.exec(text); |
| 129 | + if (!startKey) return null; |
| 130 | + const sliceStart = startKey.index + startKey[0].length; |
| 131 | + |
| 132 | + // Preferred anchor: the `", "findings"` transition. The regex allows |
| 133 | + // whitespace / newlines between the closing quote and the next key. |
| 134 | + const endAnchor = /"\s*,\s*"findings"\s*:/g; |
| 135 | + endAnchor.lastIndex = sliceStart; |
| 136 | + const endMatch = endAnchor.exec(text); |
| 137 | + if (endMatch) { |
| 138 | + return text.substring(sliceStart, endMatch.index); |
| 139 | + } |
| 140 | + |
| 141 | + // Fallback: slice to the last `"` before the outermost closing `}`. |
| 142 | + const lastBrace = text.lastIndexOf("}"); |
| 143 | + const searchEnd = lastBrace > sliceStart ? lastBrace : text.length; |
| 144 | + const lastQuote = text.lastIndexOf('"', searchEnd); |
| 145 | + if (lastQuote > sliceStart) { |
| 146 | + return text.substring(sliceStart, lastQuote); |
| 147 | + } |
| 148 | + return null; |
| 149 | +} |
| 150 | + |
| 151 | +/** |
| 152 | + * Extract the `findings` array. Uses a depth-aware walker that tracks |
| 153 | + * JSON string state so brackets inside string literals don't confuse |
| 154 | + * the bracket counter. If the extracted slice fails strict JSON.parse |
| 155 | + * we return an empty array — we'd rather show the verdict + summary |
| 156 | + * than nothing at all. |
| 157 | + */ |
| 158 | +function extractFindings(text) { |
| 159 | + const startKey = /"findings"\s*:\s*\[/.exec(text); |
| 160 | + if (!startKey) return []; |
| 161 | + const arrayStart = startKey.index + startKey[0].length - 1; // points at `[` |
| 162 | + const arrayText = sliceMatchingBracket(text, arrayStart); |
| 163 | + if (!arrayText) return []; |
| 164 | + try { |
| 165 | + const parsed = JSON.parse(arrayText); |
| 166 | + return Array.isArray(parsed) ? parsed : []; |
| 167 | + } catch { |
| 168 | + return []; |
| 169 | + } |
| 170 | +} |
| 171 | + |
| 172 | +/** |
| 173 | + * Walk `text` starting at `openIdx` (which must point at a `[` or |
| 174 | + * `{`) and return the substring up to and including the matching |
| 175 | + * closing bracket, or `null` if no match is found. Tracks JSON string |
| 176 | + * state so brackets inside strings don't affect the depth counter. |
| 177 | + * |
| 178 | + * Exported for tests. |
| 179 | + * |
| 180 | + * @param {string} text |
| 181 | + * @param {number} openIdx |
| 182 | + * @returns {string|null} |
| 183 | + */ |
| 184 | +export function sliceMatchingBracket(text, openIdx) { |
| 185 | + const open = text[openIdx]; |
| 186 | + if (open !== "[" && open !== "{") return null; |
| 187 | + const close = open === "[" ? "]" : "}"; |
| 188 | + let depth = 0; |
| 189 | + let inString = false; |
| 190 | + let escape = false; |
| 191 | + for (let i = openIdx; i < text.length; i += 1) { |
| 192 | + const ch = text[i]; |
| 193 | + if (escape) { |
| 194 | + escape = false; |
| 195 | + continue; |
| 196 | + } |
| 197 | + if (ch === "\\") { |
| 198 | + escape = true; |
| 199 | + continue; |
| 200 | + } |
| 201 | + if (inString) { |
| 202 | + if (ch === '"') inString = false; |
| 203 | + continue; |
| 204 | + } |
| 205 | + if (ch === '"') { |
| 206 | + inString = true; |
| 207 | + continue; |
| 208 | + } |
| 209 | + if (ch === open) depth += 1; |
| 210 | + if (ch === close) { |
| 211 | + depth -= 1; |
| 212 | + if (depth === 0) { |
| 213 | + return text.substring(openIdx, i + 1); |
| 214 | + } |
| 215 | + } |
| 216 | + } |
| 217 | + return null; |
| 218 | +} |
0 commit comments