-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreview-parser.mjs
More file actions
218 lines (204 loc) · 7.07 KB
/
review-parser.mjs
File metadata and controls
218 lines (204 loc) · 7.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
// Schema-aware parser for OpenCode review output.
//
// The review system prompt asks the model for a JSON object shaped
// like `{ verdict, summary, findings: [...] }`. Models frequently
// produce JSON that is structurally valid *except* for a string
// field (usually `summary` or a finding's `body`) that contains a
// literal `"` that should have been escaped `\"`.
//
// The common failure mode looks like this:
//
// {
// "verdict": "approve",
// "summary": "... explicit {"success": false, "error": "..."} ...",
// "findings": []
// }
//
// `JSON.parse` bails at the first unescaped `"` inside `summary`, so
// `structured` comes back null and the companion falls through to
// printing the raw text. We then render ugly JSON in the chat AND in
// the posted PR comment — see issue report on v1.0.10.
//
// Fix strategy:
// 1. Try strict `JSON.parse` first (fast path).
// 2. If that fails, extract each top-level field by anchoring on its
// key name. We slice `summary` between `"summary": "` and the
// `", "findings": [...]` anchor that comes after it, so embedded
// quotes never break the extraction.
// 3. For `findings`, do a depth-aware bracket walk that tracks
// string state, and attempt to parse the extracted array. If
// even that fails, return an empty findings array — we'd rather
// show verdict + summary than give up entirely.
//
// This is NOT a general-purpose JSON repair library. It is narrowly
// tailored to the `{verdict, summary, findings}` review schema and
// assumes the model emits the fields in that order (which our prompt
// template encourages). Anything outside that schema falls through to
// `null` and the caller treats the output as unstructured.
/**
* @typedef {{
* verdict: string,
* summary: string,
* findings: Array<object>,
* }} Review
*/
/**
* Try to parse `text` as an OpenCode review. Returns `null` when even
* the lenient fallback can't recover the verdict, which is the minimum
* the caller needs to render anything useful.
*
* @param {string} text
* @returns {Review|null}
*/
export function tryParseReview(text) {
if (typeof text !== "string") return null;
const candidate = stripCodeFence(text).trim();
if (!candidate) return null;
// Fast path: strict JSON.parse.
try {
const parsed = JSON.parse(candidate);
if (parsed && typeof parsed === "object") {
return normalizeReview(parsed);
}
} catch {
// fall through to lenient extraction
}
return lenientExtract(candidate);
}
/**
* Strip a ```json … ``` code fence if present. Returns the inner
* content, or the original text when there is no fence.
*/
function stripCodeFence(text) {
const fenced = /```(?:json)?\s*\n?([\s\S]*?)\n?```/.exec(text);
return fenced ? fenced[1] : text;
}
function normalizeReview(parsed) {
const verdict = typeof parsed.verdict === "string" ? parsed.verdict : null;
if (!verdict) return null;
const summary = typeof parsed.summary === "string" ? parsed.summary : "";
const findings = Array.isArray(parsed.findings) ? parsed.findings : [];
return { verdict, summary, findings };
}
// ---------------------------------------------------------------------
// Lenient extraction
// ---------------------------------------------------------------------
/**
* Schema-aware extractor used when strict JSON.parse fails. Walks
* `text` looking for the three known top-level keys by name.
*
* @param {string} text
* @returns {Review|null}
*/
function lenientExtract(text) {
const verdict = extractVerdict(text);
if (!verdict) return null;
const summary = extractSummary(text) ?? "";
const findings = extractFindings(text);
return { verdict, summary, findings };
}
/**
* Verdict values are from a closed vocabulary (`approve` or
* `needs-attention`), so a plain regex is safe — there is no way for
* a verdict value to itself contain a `"`.
*/
function extractVerdict(text) {
const m = /"verdict"\s*:\s*"(approve|needs-attention)"/.exec(text);
return m ? m[1] : null;
}
/**
* Slice `summary` between `"summary": "` and the next occurrence of
* the `", "findings"` anchor. Anything in between — including literal
* unescaped `"` characters — is treated as part of the summary. When
* the anchor isn't present we fall back to slicing to the last `"`
* before the closing `}`, so summaries in malformed responses that
* are missing the findings field still come through.
*/
function extractSummary(text) {
const startKey = /"summary"\s*:\s*"/.exec(text);
if (!startKey) return null;
const sliceStart = startKey.index + startKey[0].length;
// Preferred anchor: the `", "findings"` transition. The regex allows
// whitespace / newlines between the closing quote and the next key.
const endAnchor = /"\s*,\s*"findings"\s*:/g;
endAnchor.lastIndex = sliceStart;
const endMatch = endAnchor.exec(text);
if (endMatch) {
return text.substring(sliceStart, endMatch.index);
}
// Fallback: slice to the last `"` before the outermost closing `}`.
const lastBrace = text.lastIndexOf("}");
const searchEnd = lastBrace > sliceStart ? lastBrace : text.length;
const lastQuote = text.lastIndexOf('"', searchEnd);
if (lastQuote > sliceStart) {
return text.substring(sliceStart, lastQuote);
}
return null;
}
/**
* Extract the `findings` array. Uses a depth-aware walker that tracks
* JSON string state so brackets inside string literals don't confuse
* the bracket counter. If the extracted slice fails strict JSON.parse
* we return an empty array — we'd rather show the verdict + summary
* than nothing at all.
*/
function extractFindings(text) {
const startKey = /"findings"\s*:\s*\[/.exec(text);
if (!startKey) return [];
const arrayStart = startKey.index + startKey[0].length - 1; // points at `[`
const arrayText = sliceMatchingBracket(text, arrayStart);
if (!arrayText) return [];
try {
const parsed = JSON.parse(arrayText);
return Array.isArray(parsed) ? parsed : [];
} catch {
return [];
}
}
/**
* Walk `text` starting at `openIdx` (which must point at a `[` or
* `{`) and return the substring up to and including the matching
* closing bracket, or `null` if no match is found. Tracks JSON string
* state so brackets inside strings don't affect the depth counter.
*
* Exported for tests.
*
* @param {string} text
* @param {number} openIdx
* @returns {string|null}
*/
export function sliceMatchingBracket(text, openIdx) {
const open = text[openIdx];
if (open !== "[" && open !== "{") return null;
const close = open === "[" ? "]" : "}";
let depth = 0;
let inString = false;
let escape = false;
for (let i = openIdx; i < text.length; i += 1) {
const ch = text[i];
if (escape) {
escape = false;
continue;
}
if (ch === "\\") {
escape = true;
continue;
}
if (inString) {
if (ch === '"') inString = false;
continue;
}
if (ch === '"') {
inString = true;
continue;
}
if (ch === open) depth += 1;
if (ch === close) {
depth -= 1;
if (depth === 0) {
return text.substring(openIdx, i + 1);
}
}
}
return null;
}