diff --git a/scripts/diff-flat.js b/scripts/diff-flat.js index 76eceac084f45d..a21140314af5e8 100644 --- a/scripts/diff-flat.js +++ b/scripts/diff-flat.js @@ -262,6 +262,323 @@ const deepMerge = (target, source) => { return target; }; +/** + * Collects URL fingerprints (spec_url and mdn_url) for each feature, and + * includes features without URLs as empty entries so they're available to + * the token-based fallback matcher. + * @param {*} contents the merged data tree. + * @returns {Map>} map from feature path to URL set (possibly empty). + */ +const collectFeatures = (contents) => { + /** @type {Map>} */ + const features = new Map(); + for (const { path, compat } of walk(undefined, contents)) { + /** @type {Set} */ + const urls = new Set(); + if (compat.spec_url) { + for (const url of toArray(compat.spec_url)) { + urls.add(`spec:${url}`); + } + } + if (compat.mdn_url) { + urls.add(`mdn:${compat.mdn_url}`); + } + features.set(path, urls); + } + return features; +}; + +/** + * Tokenizes a feature path's leaf segment into lowercase words, splitting on + * `_`, `.` and camelCase boundaries. Returns a Set so each word counts once + * per feature. + * @param {string} path the feature path. + * @returns {Set} the leaf tokens. + */ +const tokenizeLeaf = (path) => { + const leaf = path.split('.').pop() ?? ''; + return new Set( + leaf + .split(/[_.]+|(?=[A-Z])/) + .filter(Boolean) + .map((w) => w.toLowerCase()), + ); +}; + +/** + * Reads the value at a dot-separated path within a tree. + * @param {*} root the root object. + * @param {string} path dot-separated path. + * @returns {*} the value, or undefined if any segment is missing. + */ +const getAt = (root, path) => { + let node = root; + for (const part of path.split('.')) { + if (typeof node !== 'object' || node === null) { + return undefined; + } + node = node[part]; + } + return node; +}; + +/** + * Writes a value at a dot-separated path within a tree, creating intermediate + * plain objects as needed. + * @param {*} root the root object. + * @param {string} path dot-separated path. + * @param {*} value the value to set. + * @returns {void} + */ +const setAt = (root, path, value) => { + const parts = path.split('.'); + let node = root; + for (let i = 0; i < parts.length - 1; i++) { + if (typeof node[parts[i]] !== 'object' || node[parts[i]] === null) { + node[parts[i]] = {}; + } + node = node[parts[i]]; + } + node[parts[parts.length - 1]] = value; +}; + +/** + * Relocates each move's `__compat` block from its source path to its + * destination path within the base tree. After projection, the diff treats + * each move as if the feature had always lived at the new path with the + * old values, so a pure rename produces no add/remove noise. + * @param {*} baseContents the base data tree (mutated). + * @param {Map} moves source → destination paths. + * @returns {void} + */ +const projectMoves = (baseContents, moves) => { + for (const [from, to] of moves) { + const source = getAt(baseContents, from); + if (!source || typeof source !== 'object' || !source.__compat) { + continue; + } + const dest = getAt(baseContents, to); + if (dest && typeof dest === 'object') { + dest.__compat = source.__compat; + } else { + setAt(baseContents, to, { __compat: source.__compat }); + } + delete source.__compat; + } +}; + +/** + * Detects features that were moved (renamed) in two passes: + * 1. Match by shared spec_url/mdn_url, with longest-shared-path-prefix as + * tiebreaker when multiple candidates share a URL. + * 2. For features still unmatched, match by common ancestor path plus + * shared non-scaffold leaf words (`keepalive`, `signal`, etc.). + * Scaffold tokens — those appearing in more than half of unmatched + * removed or added features (e.g. `init`, `parameter`) — are ignored. + * @param {*} baseContents the merged base data tree. + * @param {*} headContents the merged head data tree. + * @returns {Map} map from removed path to added path. + */ +const detectMoves = (baseContents, headContents) => { + const baseFeatures = collectFeatures(baseContents); + const headFeatures = collectFeatures(headContents); + + /** @type {Map} */ + const addedByUrl = new Map(); + for (const [path, urls] of headFeatures) { + if (baseFeatures.has(path)) { + continue; + } + for (const url of urls) { + const list = addedByUrl.get(url) ?? []; + list.push(path); + addedByUrl.set(url, list); + } + } + + /** @type {Map} */ + const moves = new Map(); + /** @type {Set} */ + const matchedDests = new Set(); + for (const [removedPath, urls] of baseFeatures) { + if (headFeatures.has(removedPath) || urls.size === 0) { + continue; + } + /** @type {Set} */ + const candidates = new Set(); + for (const url of urls) { + for (const candidate of addedByUrl.get(url) ?? []) { + candidates.add(candidate); + } + } + if (candidates.size === 0) { + continue; + } + + const removedParts = removedPath.split('.'); + let best = ''; + let bestScore = -1; + for (const candidate of candidates) { + const candidateParts = candidate.split('.'); + let score = 0; + while ( + score < removedParts.length && + score < candidateParts.length && + removedParts[score] === candidateParts[score] + ) { + score++; + } + if (score > bestScore) { + best = candidate; + bestScore = score; + } + } + moves.set(removedPath, best); + matchedDests.add(best); + } + + // Pass 2: token + common-ancestor matching for the rest. + const unmatchedRemoved = [...baseFeatures.keys()].filter( + (p) => !headFeatures.has(p) && !moves.has(p), + ); + const unmatchedAdded = [...headFeatures.keys()].filter( + (p) => !baseFeatures.has(p) && !matchedDests.has(p), + ); + if (unmatchedRemoved.length === 0 || unmatchedAdded.length === 0) { + return moves; + } + + /** @type {Map>} */ + const removedTokens = new Map(); + /** @type {Map>} */ + const addedTokens = new Map(); + /** @type {Map} */ + const removedFreq = new Map(); + /** @type {Map} */ + const addedFreq = new Map(); + for (const p of unmatchedRemoved) { + const tokens = tokenizeLeaf(p); + removedTokens.set(p, tokens); + for (const t of tokens) { + removedFreq.set(t, (removedFreq.get(t) ?? 0) + 1); + } + } + for (const p of unmatchedAdded) { + const tokens = tokenizeLeaf(p); + addedTokens.set(p, tokens); + for (const t of tokens) { + addedFreq.set(t, (addedFreq.get(t) ?? 0) + 1); + } + } + /** + * @param {string} token + * @returns {boolean} true if the token is too common to be distinctive. + */ + const isScaffold = (token) => + (removedFreq.get(token) ?? 0) > unmatchedRemoved.length / 2 || + (addedFreq.get(token) ?? 0) > unmatchedAdded.length / 2; + + for (const removedPath of unmatchedRemoved) { + const rTokens = /** @type {Set} */ (removedTokens.get(removedPath)); + const rParts = removedPath.split('.'); + let best = ''; + let bestScore = -1; + + for (const addedPath of unmatchedAdded) { + if (matchedDests.has(addedPath)) { + continue; + } + const aTokens = /** @type {Set} */ (addedTokens.get(addedPath)); + const aParts = addedPath.split('.'); + + let ancestor = 0; + while ( + ancestor < rParts.length - 1 && + ancestor < aParts.length - 1 && + rParts[ancestor] === aParts[ancestor] + ) { + ancestor++; + } + if (ancestor === 0) { + continue; + } + + let tokenScore = 0; + for (const t of rTokens) { + if (aTokens.has(t) && !isScaffold(t)) { + const freq = (removedFreq.get(t) ?? 0) + (addedFreq.get(t) ?? 0) || 1; + tokenScore += 1 / freq; + } + } + if (tokenScore === 0) { + continue; + } + + const score = ancestor * 1000 + tokenScore; + if (score > bestScore) { + best = addedPath; + bestScore = score; + } + } + + if (best) { + moves.set(removedPath, best); + matchedDests.add(best); + } + } + + return moves; +}; + +/** + * Formats a moved feature path as `prefix.{from → to}.suffix`, with the + * differing middle segments highlighted (from in red, to in green) and the + * shared head/tail segments unstyled. + * @param {string} from the source path. + * @param {string} to the destination path. + * @param {object} options Options + * @param {Format} options.format Whether to return HTML, otherwise plaintext. + * @returns {string} the formatted move string. + */ +/** + * Formats a moved feature path as an inline diff, with chunks added in head + * (green) and chunks present only in base (red) interleaved next to the + * shared parts. Tokenizes each path so `.`/`_` separators stay attached to + * the preceding word — partial-word overlaps like `er` in `parameter` and + * `referrer` aren't matched. + * @param {string} from the source path. + * @param {string} to the destination path. + * @param {object} options Options + * @param {Format} options.format Whether to return HTML, otherwise plaintext. + * @returns {string} the formatted move string. + */ +const formatMove = (from, to, options) => { + /** + * Tokenizes a path into words and separators (`.`/`_`) so each can be + * matched independently by the diff. + * @param {string} s the path to tokenize. + * @returns {string[]} interleaved word and separator tokens. + */ + const tokenize = (s) => s.split(/([._])/); + return diffArrays(tokenize(to), tokenize(from)) + .map((part) => { + // Note: removed/added is deliberately inverted here, to have additions + // first — matching the convention used for value diffs. + const value = part.value.join(''); + if (part.removed) { + return options.format == 'html' + ? `${value}` + : styleText('green', value); + } else if (part.added) { + return options.format == 'html' + ? `${value}` + : styleText('red', value); + } + return value; + }) + .join(''); +}; + /** * Print diffs * @param {string} base Base ref @@ -334,6 +651,20 @@ const printDiffs = (base, head, options) => { } } + const moves = detectMoves(baseContents, headContents); + + const baseFeaturePaths = collectFeatures(baseContents); + const headFeaturePaths = collectFeatures(headContents); + const movedDests = new Set(moves.values()); + const addedFeatures = [...headFeaturePaths.keys()] + .filter((p) => !baseFeaturePaths.has(p) && !movedDests.has(p)) + .sort(); + const removedFeatures = [...baseFeaturePaths.keys()] + .filter((p) => !headFeaturePaths.has(p) && !moves.has(p)) + .sort(); + + projectMoves(baseContents, moves); + const baseData = flattenObject(baseContents); const headData = flattenObject(headContents); @@ -354,23 +685,16 @@ const printDiffs = (base, head, options) => { const commonName = options.format === 'html' ? `

${prefix}

` : `${prefix}`; - let lastKey = ''; - - for (const key of keys) { - const baseValue = JSON.stringify(baseData[key] ?? null); - const headValue = JSON.stringify(headData[key] ?? null); - if (baseValue === headValue) { - continue; - } - if (!lastKey) { - lastKey = key; - } - const keyDiff = diffKeys( - key.slice(prefix.length), - lastKey.slice(prefix.length), - options, - ); - + /** + * Renders a colored inline diff between two stringified field values, + * matching the convention used elsewhere: green for additions in head, red + * for removals from base. Returns an empty string when the diff would be + * empty (e.g. null → "mirror" / "false"). + * @param {string} baseValue stringified base value (or `"null"`). + * @param {string} headValue stringified head value (or `"null"`). + * @returns {string} the colored diff string. + */ + const formatValueDiff = (baseValue, headValue) => { const splitRegexp = /(?<=^")|(?<=[\],/ ])|(?=[[,/ ])|(?="$)|(?<=\d)(?=−)|(?<=−)(?=\d)|(?=#)/; let headValueForDiff = headValue; @@ -379,19 +703,19 @@ const printDiffs = (base, head, options) => { if (baseValue == 'null') { baseValueForDiff = ''; if (headValue == '"mirror"' || headValue == '"false"') { - // Ignore initial "mirror"/"false" values. headValueForDiff = ''; } } else if (headValue == 'null') { headValueForDiff = ''; } - const valueDiff = diffArrays( + return diffArrays( headValueForDiff.split(splitRegexp), baseValueForDiff.split(splitRegexp), ) .map((part) => { - // Note: removed/added is deliberately inversed here, to have additions first. + // Note: removed/added is deliberately inverted here, to have + // additions first. const value = part.value.join(''); if (part.removed) { return options.format == 'html' @@ -402,11 +726,57 @@ const printDiffs = (base, head, options) => { ? `${value}` : styleText('red', value); } - return value; }) .join(''); + }; + + /** @type {Set} */ + const consumedKeys = new Set(); + for (const [, to] of moves) { + consumedKeys.add(`${to}.__compat.description`); + } + for (const path of [...addedFeatures, ...removedFeatures]) { + consumedKeys.add(`${path}.__compat.description`); + } + + /** + * Returns the colored description diff at a feature path, or empty if + * unchanged. + * @param {string} path the feature path. + * @returns {string} the colored description diff (or empty). + */ + const featureDescriptionDiff = (path) => { + const key = `${path}.__compat.description`; + const baseValue = JSON.stringify(baseData[key] ?? null); + const headValue = JSON.stringify(headData[key] ?? null); + if (baseValue === headValue) { + return ''; + } + return formatValueDiff(baseValue, headValue); + }; + + let lastKey = ''; + + for (const key of keys) { + if (consumedKeys.has(key)) { + continue; + } + const baseValue = JSON.stringify(baseData[key] ?? null); + const headValue = JSON.stringify(headData[key] ?? null); + if (baseValue === headValue) { + continue; + } + if (!lastKey) { + lastKey = key; + } + const keyDiff = diffKeys( + key.slice(prefix.length), + lastKey.slice(prefix.length), + options, + ); + const valueDiff = formatValueDiff(baseValue, headValue); const value = valueDiff; if (!value.length) { @@ -447,7 +817,12 @@ const printDiffs = (base, head, options) => { lastKey = key; } - if (groups.size === 0) { + if ( + groups.size === 0 && + !addedFeatures.length && + !removedFeatures.length && + !moves.size + ) { console.log('✔ No changes.'); return; } @@ -532,6 +907,105 @@ const printDiffs = (base, head, options) => { } }; + /** + * @typedef {object} ListingItem + * @property {string} section section header. + * @property {string} rendered styled key (path or move). + * @property {number} visibleLen visible length of `rendered` (no styling). + * @property {string} desc styled description diff (or empty). + */ + + /** @type {ListingItem[]} */ + const listingItems = []; + for (const path of addedFeatures) { + const lastDot = path.lastIndexOf('.'); + const parent = lastDot === -1 ? '' : path.slice(0, lastDot + 1); + const leaf = lastDot === -1 ? path : path.slice(lastDot + 1); + const styledLeaf = + options.format === 'html' + ? `${leaf}` + : styleText('green', leaf); + listingItems.push({ + section: 'New features', + rendered: `${parent}${styledLeaf}`, + visibleLen: path.length, + desc: featureDescriptionDiff(path), + }); + } + for (const path of removedFeatures) { + const lastDot = path.lastIndexOf('.'); + const parent = lastDot === -1 ? '' : path.slice(0, lastDot + 1); + const leaf = lastDot === -1 ? path : path.slice(lastDot + 1); + const styledLeaf = + options.format === 'html' + ? `${leaf}` + : styleText('red', leaf); + listingItems.push({ + section: 'Removed features', + rendered: `${parent}${styledLeaf}`, + visibleLen: path.length, + desc: featureDescriptionDiff(path), + }); + } + for (const [from, to] of moves) { + const rendered = formatMove(from, to, options); + const visibleLen = + options.format === 'html' + ? rendered.replace(/<[^>]+>/g, '').length + : stripAnsi(rendered).length; + listingItems.push({ + section: 'Moved features', + rendered, + visibleLen, + desc: featureDescriptionDiff(to), + }); + } + + if (listingItems.length) { + const maxLen = Math.max(...listingItems.map((i) => i.visibleLen)); + const hasAnyDesc = listingItems.some((i) => i.desc); + let lastSection = ''; + for (const item of listingItems) { + if (item.section !== lastSection) { + if (lastSection) { + console.log(''); + } + const title = `${item.section}:`; + const styledTitle = + options.format === 'html' + ? `${title}` + : styleText('bold', title); + let header = styledTitle; + if (hasAnyDesc) { + const padding = ' '.repeat(Math.max(1, maxLen + 3 - title.length)); + const descLabel = 'description ='; + header += + padding + + (options.format === 'html' + ? `${descLabel}` + : styleText('italic', descLabel)); + } + console.log(header); + lastSection = item.section; + } + let line = ` ${item.rendered}`; + if (item.desc) { + const padding = ' '.repeat(1 + maxLen - item.visibleLen); + const styledDesc = + options.format === 'html' + ? `${item.desc}` + : styleText('italic', item.desc); + line += padding + styledDesc; + } + console.log(line); + } + console.log(''); + } + + if (addedFeatures.length || removedFeatures.length || moves.size) { + console.log(''); + } + for (const entry of entries) { /** @type {string | null} */ let previousKey = null;