From 670d025b762e04aec20d0f895cb2717ded8498f1 Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Thu, 15 Jan 2026 17:20:27 -0500 Subject: [PATCH] url: optimize path resolution with single-pass algorithm --- lib/url.js | 257 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 185 insertions(+), 72 deletions(-) diff --git a/lib/url.js b/lib/url.js index 50b776d38539a7..c7aa1882f206c3 100644 --- a/lib/url.js +++ b/lib/url.js @@ -23,6 +23,7 @@ const { ArrayPrototypeJoin, + ArrayPrototypePush, Boolean, Int8Array, ObjectAssign, @@ -30,6 +31,7 @@ const { StringPrototypeAt, StringPrototypeCharCodeAt, StringPrototypeIndexOf, + StringPrototypeLastIndexOf, StringPrototypeReplaceAll, StringPrototypeSlice, decodeURIComponent, @@ -52,7 +54,6 @@ const { // This ensures setURLConstructor() is called before the native // URL::ToObject() method is used. -const { spliceOne } = require('internal/util'); const { isInsideNodeModules } = internalBinding('util'); // WHATWG URL implementation provided by internal/url @@ -91,6 +92,66 @@ function Url() { // Reference: RFC 3986, RFC 1808, RFC 2396 +/** + * Normalize URL path segments by resolving . and .. in a single pass. + * This is O(n) compared to the O(n²) spliceOne approach. + * + * @param {string} path - Combined pathname to normalize + * @param {boolean} allowAboveRoot - Whether .. can go above root + * @returns {{ segments: string[], up: number, trailingSlash: boolean }} + */ +function normalizePathSegments(path, allowAboveRoot) { + if (!path) return { segments: [], up: 0, trailingSlash: false }; + + const len = path.length; + const segments = []; + let up = 0; + let lastSlash = -1; + let segStart = 0; + let lastSeg = ''; + let trailingSlash = false; + + // Iterate through the path, processing segments + for (let i = 0; i <= len; i++) { + const code = i < len ? StringPrototypeCharCodeAt(path, i) : CHAR_FORWARD_SLASH; + + if (code === CHAR_FORWARD_SLASH) { + if (lastSlash === i - 1 || segStart === i) { + // Empty segment (// or leading /) + // Track that the last segment was empty for trailing slash detection + lastSeg = ''; + } else { + const segment = StringPrototypeSlice(path, segStart, i); + lastSeg = segment; + + if (segment === '.') { + // Current directory - skip + } else if (segment === '..') { + // Parent directory + if (segments.length > 0 && segments[segments.length - 1] !== '..') { + segments.pop(); + } else if (allowAboveRoot) { + ArrayPrototypePush(segments, '..'); + } else { + up++; + } + } else { + // Regular segment + ArrayPrototypePush(segments, segment); + } + } + lastSlash = i; + segStart = i + 1; + } + } + + // Determine trailing slash based on what the last segment was before normalization + // If path ends with /, ., or .., we need a trailing slash + trailingSlash = lastSeg === '' || lastSeg === '.' || lastSeg === '..'; + + return { segments, up, trailingSlash }; +} + // define these here so at least they only have to be // compiled once on the first module load. const protocolPattern = /^[a-z0-9.+-]+:/i; @@ -127,6 +188,7 @@ const { CHAR_VERTICAL_LINE, CHAR_AT, CHAR_COLON, + CHAR_DOT, } = require('internal/constants'); let urlParseWarned = false; @@ -824,11 +886,14 @@ Url.prototype.resolveObject = function resolveObject(relative) { let mustEndAbs = (isRelAbs || isSourceAbs || (result.host && relative.pathname)); const removeAllDots = mustEndAbs; - let srcPath = (result.pathname && result.pathname.split('/')) || []; - const relPath = (relative.pathname && relative.pathname.split('/')) || []; const noLeadingSlashes = result.protocol && !slashedProtocol.has(result.protocol); + // Build the combined path string for normalization + let combinedPath = ''; + let srcHost = ''; // For noLeadingSlashes protocols + let relHost = ''; // For noLeadingSlashes protocols + // If the url is a non-slashed url, then relative // links like ../.. should be able // to crawl up to the hostname, as well. This is strange. @@ -837,22 +902,15 @@ Url.prototype.resolveObject = function resolveObject(relative) { if (noLeadingSlashes) { result.hostname = ''; result.port = null; - if (result.host) { - if (srcPath[0] === '') srcPath[0] = result.host; - else srcPath.unshift(result.host); - } + srcHost = result.host || ''; result.host = ''; if (relative.protocol) { relative.hostname = null; relative.port = null; result.auth = null; - if (relative.host) { - if (relPath[0] === '') relPath[0] = relative.host; - else relPath.unshift(relative.host); - } + relHost = relative.host || ''; relative.host = null; } - mustEndAbs &&= (relPath[0] === '' || srcPath[0] === ''); } if (isRelAbs) { @@ -868,30 +926,65 @@ Url.prototype.resolveObject = function resolveObject(relative) { } result.search = relative.search; result.query = relative.query; - srcPath = relPath; - // Fall through to the dot-handling below. - } else if (relPath.length) { + // Use relative path directly + if (noLeadingSlashes && relHost) { + combinedPath = (relative.pathname && relative.pathname.charAt(0) === '/' ? + relHost + relative.pathname : relHost + '/' + (relative.pathname || '')); + } else { + combinedPath = relative.pathname || ''; + } + } else if (relative.pathname) { // it's relative // throw away the existing file, and take the new path instead. - srcPath ||= []; - srcPath.pop(); - srcPath = srcPath.concat(relPath); result.search = relative.search; result.query = relative.query; + + // Build combined path: source path (minus last segment) + relative path + let srcPathname = result.pathname || ''; + if (noLeadingSlashes && srcHost) { + srcPathname = (srcPathname && srcPathname.charAt(0) === '/' ? + srcHost + srcPathname : srcHost + '/' + srcPathname); + } + + // Remove the last segment from source (the "file" part) + const lastSlashIndex = StringPrototypeLastIndexOf(srcPathname, '/'); + if (lastSlashIndex >= 0) { + srcPathname = StringPrototypeSlice(srcPathname, 0, lastSlashIndex + 1); + } else { + srcPathname = ''; + } + + // Append relative pathname + let relPathname = relative.pathname; + if (noLeadingSlashes && relHost) { + relPathname = (relPathname && relPathname.charAt(0) === '/' ? + relHost + relPathname : relHost + '/' + relPathname); + } + combinedPath = srcPathname + relPathname; } else if (relative.search !== null && relative.search !== undefined) { // Just pull out the search. // like href='?foo'. // Put this after the other two cases because it simplifies the booleans if (noLeadingSlashes) { - result.hostname = result.host = srcPath.shift(); + // Extract host from first segment of source path + const srcPathname = result.pathname || ''; + const firstSlashIdx = StringPrototypeIndexOf(srcPathname, '/'); + if (firstSlashIdx > 0) { + result.hostname = result.host = StringPrototypeSlice(srcPathname, 0, firstSlashIdx); + } else if (firstSlashIdx === -1 && srcPathname) { + result.hostname = result.host = srcPathname; + } else if (srcHost) { + result.hostname = result.host = srcHost; + } else { + result.hostname = result.host = ''; + } // Occasionally the auth can get stuck only in host. - // This especially happens in cases like - // url.resolveObject('mailto:local1@domain1', 'local2@domain2') const authInHost = - result.host && result.host.indexOf('@') > 0 && result.host.split('@'); + result.host && StringPrototypeIndexOf(result.host, '@') > 0; if (authInHost) { - result.auth = authInHost.shift(); - result.host = result.hostname = authInHost.shift(); + const atIdx = StringPrototypeIndexOf(result.host, '@'); + result.auth = StringPrototypeSlice(result.host, 0, atIdx); + result.host = result.hostname = StringPrototypeSlice(result.host, atIdx + 1); } } result.search = relative.search; @@ -903,9 +996,24 @@ Url.prototype.resolveObject = function resolveObject(relative) { } result.href = result.format(); return result; + } else { + // No relative path at all, use source path + if (noLeadingSlashes && srcHost) { + const srcPathname = result.pathname || ''; + combinedPath = (srcPathname && srcPathname.charAt(0) === '/' ? + srcHost + srcPathname : srcHost + '/' + srcPathname); + } else { + combinedPath = result.pathname || ''; + } } - if (!srcPath.length) { + // Check if we need to handle noLeadingSlashes mustEndAbs + if (noLeadingSlashes) { + const startsWithSlash = combinedPath && combinedPath.charAt(0) === '/'; + mustEndAbs &&= startsWithSlash; + } + + if (!combinedPath) { // No path at all. All other things were already handled above. result.pathname = null; // To support http.request @@ -918,75 +1026,80 @@ Url.prototype.resolveObject = function resolveObject(relative) { return result; } - // If a url ENDs in . or .., then it must get a trailing slash. - // however, if it ends in anything else non-slashy, - // then it must NOT get a trailing slash. - let last = srcPath[srcPath.length - 1]; - const hasTrailingSlash = ( - ((result.host || relative.host || srcPath.length > 1) && - (last === '.' || last === '..')) || last === ''); - - // Strip single dots, resolve double dots to parent dir - // if the path tries to go above the root, `up` ends up > 0 - let up = 0; - for (let i = srcPath.length - 1; i >= 0; i--) { - last = srcPath[i]; - if (last === '.') { - spliceOne(srcPath, i); - } else if (last === '..') { - spliceOne(srcPath, i); - up++; - } else if (up) { - spliceOne(srcPath, i); - up--; + // Use optimized single-pass normalization (O(n) instead of O(n²)) + const allowAboveRoot = !mustEndAbs && !removeAllDots; + const { segments, up, trailingSlash } = normalizePathSegments(combinedPath, allowAboveRoot); + + // Determine if result needs trailing slash + // hasTrailingSlash is true if path ended with /, ., or .. + const hasTrailingSlash = trailingSlash && + (result.host || relative.host || segments.length > 0); + + // Handle remaining 'up' count - add leading .. if allowed + let srcPath = segments; + if (up > 0 && allowAboveRoot) { + // Prepend '..' segments for remaining up count + const newPath = []; + for (let i = 0; i < up; i++) { + ArrayPrototypePush(newPath, '..'); } - } - - // If the path is allowed to go above the root, restore leading ..s - if (!mustEndAbs && !removeAllDots) { - while (up--) { - srcPath.unshift('..'); + for (let i = 0; i < srcPath.length; i++) { + ArrayPrototypePush(newPath, srcPath[i]); } + srcPath = newPath; } - if (mustEndAbs && srcPath[0] !== '' && - (!srcPath[0] || srcPath[0].charAt(0) !== '/')) { - srcPath.unshift(''); - } - - if (hasTrailingSlash && StringPrototypeAt(ArrayPrototypeJoin(srcPath, '/'), -1) !== '/') { - srcPath.push(''); + // Handle mustEndAbs - ensure path starts with / + let isAbsolute = srcPath.length > 0 && srcPath[0] === ''; + if (!isAbsolute && srcPath.length > 0 && srcPath[0] && + srcPath[0].charAt(0) === '/') { + isAbsolute = true; } - const isAbsolute = srcPath[0] === '' || - (srcPath[0] && srcPath[0].charAt(0) === '/'); - - // put the host back + // put the host back for noLeadingSlashes protocols if (noLeadingSlashes) { result.hostname = - result.host = isAbsolute ? '' : srcPath.length ? srcPath.shift() : ''; + result.host = isAbsolute ? '' : srcPath.length ? srcPath[0] : ''; + if (result.host) { + // Remove the host from srcPath (first element) + srcPath = srcPath.length > 1 ? + ArrayPrototypeJoin(srcPath, '/').slice(result.host.length + 1).split('/') : + []; + if (srcPath.length === 1 && srcPath[0] === '') srcPath = []; + } // Occasionally the auth can get stuck only in host. - // This especially happens in cases like - // url.resolveObject('mailto:local1@domain1', 'local2@domain2') - const authInHost = result.host && result.host.indexOf('@') > 0 ? - result.host.split('@') : false; + const authInHost = result.host && StringPrototypeIndexOf(result.host, '@') > 0; if (authInHost) { - result.auth = authInHost.shift(); - result.host = result.hostname = authInHost.shift(); + const atIdx = StringPrototypeIndexOf(result.host, '@'); + result.auth = StringPrototypeSlice(result.host, 0, atIdx); + result.host = result.hostname = StringPrototypeSlice(result.host, atIdx + 1); } } mustEndAbs ||= (result.host && srcPath.length); if (mustEndAbs && !isAbsolute) { - srcPath.unshift(''); + // Need to add leading empty string for absolute path + const newPath = ['']; + for (let i = 0; i < srcPath.length; i++) { + ArrayPrototypePush(newPath, srcPath[i]); + } + srcPath = newPath; + isAbsolute = true; + } + + // Handle trailing slash + if (hasTrailingSlash) { + if (srcPath.length === 0 || srcPath[srcPath.length - 1] !== '') { + ArrayPrototypePush(srcPath, ''); + } } if (!srcPath.length) { result.pathname = null; result.path = null; } else { - result.pathname = srcPath.join('/'); + result.pathname = ArrayPrototypeJoin(srcPath, '/'); } // To support request.http