diff --git a/skills/company-research/scripts/compile_report.mjs b/skills/company-research/scripts/compile_report.mjs
index 2759c69..b18ee24 100644
--- a/skills/company-research/scripts/compile_report.mjs
+++ b/skills/company-research/scripts/compile_report.mjs
@@ -93,6 +93,20 @@ function escapeHtml(str) {
return (str || '').replace(/&/g, '&').replace(//g, '>').replace(/"/g, '"');
}
+// Reject any URL whose scheme isn't http(s)/mailto so the company `website`
+// field can't smuggle a `javascript:` payload into the rendered href.
+function safeUrl(u) {
+ if (!u || typeof u !== 'string') return null;
+ // Strip C0 controls + DEL before any scheme check. The WHATWG URL parser
+ // removes these before parsing, so `java\tscript:` reaches the browser as
+ // `javascript:` and runs — but the scheme regex below wouldn't catch it
+ // because `[a-z0-9+.-]` rejects the tab and falls through to return trimmed.
+ const trimmed = u.trim().replace(/[\x00-\x1F\x7F]/g, '');
+ if (/^(\/\/|https?:\/\/|mailto:)/i.test(trimmed)) return trimmed;
+ if (/^[a-z][a-z0-9+.-]*:/i.test(trimmed)) return null;
+ return trimmed;
+}
+
function scoreClass(score) {
const s = parseInt(score) || 0;
if (s >= 8) return 'high';
@@ -205,8 +219,9 @@ const tableRows = deduped.map(c => {
const nameHtml = hasDetail
? `${escapeHtml(c.company_name)}`
: escapeHtml(c.company_name);
- const websiteHtml = c.website
- ? `
${escapeHtml(c.website.replace(/^https?:\/\/(www\.)?/, ''))}`
+ const safeWebsite = safeUrl(c.website);
+ const websiteHtml = safeWebsite
+ ? `
${escapeHtml(safeWebsite.replace(/^https?:\/\/(www\.)?/, ''))}`
: '';
return `