Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
257 changes: 185 additions & 72 deletions lib/url.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,15 @@

const {
ArrayPrototypeJoin,
ArrayPrototypePush,
Boolean,
Int8Array,
ObjectAssign,
ObjectKeys,
StringPrototypeAt,

Check failure on line 31 in lib/url.js

View workflow job for this annotation

GitHub Actions / lint-js-and-md

'StringPrototypeAt' is assigned a value but never used
StringPrototypeCharCodeAt,
StringPrototypeIndexOf,
StringPrototypeLastIndexOf,
StringPrototypeReplaceAll,
StringPrototypeSlice,
decodeURIComponent,
Expand All @@ -52,7 +54,6 @@

// This ensures setURLConstructor() is called before the native
// URL::ToObject() method is used.
const { spliceOne } = require('internal/util');
const { isInsideNodeModules } = internalBinding('util');

// WHATWG URL implementation provided by internal/url
Expand Down Expand Up @@ -91,7 +92,67 @@

// Reference: RFC 3986, RFC 1808, RFC 2396

/**
* Normalize URL path segments by resolving . and .. in a single pass.
* This is O(n) compared to the O(n²) spliceOne approach.

Check failure on line 97 in lib/url.js

View workflow job for this annotation

GitHub Actions / lint-js-and-md

Non-ASCII character '²' detected

Check warning on line 97 in lib/url.js

View workflow job for this annotation

GitHub Actions / lint-js-and-md

Expected only 0 lines after block description
*
* @param {string} path - Combined pathname to normalize
* @param {boolean} allowAboveRoot - Whether .. can go above root
* @returns {{ segments: string[], up: number, trailingSlash: boolean }}
*/
function normalizePathSegments(path, allowAboveRoot) {
if (!path) return { segments: [], up: 0, trailingSlash: false };
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (!path) return { segments: [], up: 0, trailingSlash: false };
if (!path) {
return {
__proto__: null,
segments: [],
up: 0,
trailingSlash: false,
}
}


const len = path.length;
const segments = [];
let up = 0;
let lastSlash = -1;
let segStart = 0;
let lastSeg = '';
let trailingSlash = false;

// Iterate through the path, processing segments
for (let i = 0; i <= len; i++) {
const code = i < len ? StringPrototypeCharCodeAt(path, i) : CHAR_FORWARD_SLASH;

if (code === CHAR_FORWARD_SLASH) {
if (lastSlash === i - 1 || segStart === i) {
// Empty segment (// or leading /)
// Track that the last segment was empty for trailing slash detection
lastSeg = '';
} else {
const segment = StringPrototypeSlice(path, segStart, i);
lastSeg = segment;

if (segment === '.') {
// Current directory - skip
} else if (segment === '..') {
// Parent directory
if (segments.length > 0 && segments[segments.length - 1] !== '..') {
segments.pop();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
segments.pop();
ArrayPrototypePop(segments);

} else if (allowAboveRoot) {
ArrayPrototypePush(segments, '..');
} else {
up++;
}
} else {
// Regular segment
ArrayPrototypePush(segments, segment);
}
}
lastSlash = i;
segStart = i + 1;
}
}

// Determine trailing slash based on what the last segment was before normalization
// If path ends with /, ., or .., we need a trailing slash
trailingSlash = lastSeg === '' || lastSeg === '.' || lastSeg === '..';

return { segments, up, trailingSlash };
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return { segments, up, trailingSlash };
return {
__proto__: null,
segments,
up,
trailingSlash,
}

}

// define these here so at least they only have to be

Check failure on line 155 in lib/url.js

View workflow job for this annotation

GitHub Actions / lint-js-and-md

Comments should not begin with a lowercase character
// compiled once on the first module load.
const protocolPattern = /^[a-z0-9.+-]+:/i;
const portPattern = /:[0-9]*$/;
Expand Down Expand Up @@ -127,6 +188,7 @@
CHAR_VERTICAL_LINE,
CHAR_AT,
CHAR_COLON,
CHAR_DOT,

Check failure on line 191 in lib/url.js

View workflow job for this annotation

GitHub Actions / lint-js-and-md

'CHAR_DOT' is assigned a value but never used
} = require('internal/constants');

let urlParseWarned = false;
Expand Down Expand Up @@ -824,11 +886,14 @@
let mustEndAbs = (isRelAbs || isSourceAbs ||
(result.host && relative.pathname));
const removeAllDots = mustEndAbs;
let srcPath = (result.pathname && result.pathname.split('/')) || [];
const relPath = (relative.pathname && relative.pathname.split('/')) || [];
const noLeadingSlashes = result.protocol &&
!slashedProtocol.has(result.protocol);

// Build the combined path string for normalization
let combinedPath = '';
let srcHost = ''; // For noLeadingSlashes protocols
let relHost = ''; // For noLeadingSlashes protocols

// If the url is a non-slashed url, then relative
// links like ../.. should be able
// to crawl up to the hostname, as well. This is strange.
Expand All @@ -837,22 +902,15 @@
if (noLeadingSlashes) {
result.hostname = '';
result.port = null;
if (result.host) {
if (srcPath[0] === '') srcPath[0] = result.host;
else srcPath.unshift(result.host);
}
srcHost = result.host || '';
result.host = '';
if (relative.protocol) {
relative.hostname = null;
relative.port = null;
result.auth = null;
if (relative.host) {
if (relPath[0] === '') relPath[0] = relative.host;
else relPath.unshift(relative.host);
}
relHost = relative.host || '';
relative.host = null;
}
mustEndAbs &&= (relPath[0] === '' || srcPath[0] === '');
}

if (isRelAbs) {
Expand All @@ -868,30 +926,65 @@
}
result.search = relative.search;
result.query = relative.query;
srcPath = relPath;
// Fall through to the dot-handling below.
} else if (relPath.length) {
// Use relative path directly
if (noLeadingSlashes && relHost) {
combinedPath = (relative.pathname && relative.pathname.charAt(0) === '/' ?
relHost + relative.pathname : relHost + '/' + (relative.pathname || ''));
} else {
combinedPath = relative.pathname || '';
}
} else if (relative.pathname) {
// it's relative
// throw away the existing file, and take the new path instead.
srcPath ||= [];
srcPath.pop();
srcPath = srcPath.concat(relPath);
result.search = relative.search;
result.query = relative.query;

// Build combined path: source path (minus last segment) + relative path
let srcPathname = result.pathname || '';
if (noLeadingSlashes && srcHost) {
srcPathname = (srcPathname && srcPathname.charAt(0) === '/' ?
srcHost + srcPathname : srcHost + '/' + srcPathname);
}

// Remove the last segment from source (the "file" part)
const lastSlashIndex = StringPrototypeLastIndexOf(srcPathname, '/');
if (lastSlashIndex >= 0) {
srcPathname = StringPrototypeSlice(srcPathname, 0, lastSlashIndex + 1);
} else {
srcPathname = '';
}

// Append relative pathname
let relPathname = relative.pathname;
if (noLeadingSlashes && relHost) {
relPathname = (relPathname && relPathname.charAt(0) === '/' ?
relHost + relPathname : relHost + '/' + relPathname);
}
combinedPath = srcPathname + relPathname;
} else if (relative.search !== null && relative.search !== undefined) {
// Just pull out the search.
// like href='?foo'.
// Put this after the other two cases because it simplifies the booleans
if (noLeadingSlashes) {
result.hostname = result.host = srcPath.shift();
// Extract host from first segment of source path
const srcPathname = result.pathname || '';
const firstSlashIdx = StringPrototypeIndexOf(srcPathname, '/');
if (firstSlashIdx > 0) {
result.hostname = result.host = StringPrototypeSlice(srcPathname, 0, firstSlashIdx);
} else if (firstSlashIdx === -1 && srcPathname) {
result.hostname = result.host = srcPathname;
} else if (srcHost) {
result.hostname = result.host = srcHost;
} else {
result.hostname = result.host = '';
}
// Occasionally the auth can get stuck only in host.
// This especially happens in cases like
// url.resolveObject('mailto:local1@domain1', 'local2@domain2')
const authInHost =
result.host && result.host.indexOf('@') > 0 && result.host.split('@');
result.host && StringPrototypeIndexOf(result.host, '@') > 0;
if (authInHost) {
result.auth = authInHost.shift();
result.host = result.hostname = authInHost.shift();
const atIdx = StringPrototypeIndexOf(result.host, '@');
result.auth = StringPrototypeSlice(result.host, 0, atIdx);
result.host = result.hostname = StringPrototypeSlice(result.host, atIdx + 1);
}
}
result.search = relative.search;
Expand All @@ -903,9 +996,24 @@
}
result.href = result.format();
return result;
} else {
// No relative path at all, use source path
if (noLeadingSlashes && srcHost) {

Check failure on line 1001 in lib/url.js

View workflow job for this annotation

GitHub Actions / lint-js-and-md

Unexpected if as the only statement in an else block
const srcPathname = result.pathname || '';
combinedPath = (srcPathname && srcPathname.charAt(0) === '/' ?
srcHost + srcPathname : srcHost + '/' + srcPathname);
} else {
combinedPath = result.pathname || '';
}
}

if (!srcPath.length) {
// Check if we need to handle noLeadingSlashes mustEndAbs
if (noLeadingSlashes) {
const startsWithSlash = combinedPath && combinedPath.charAt(0) === '/';
mustEndAbs &&= startsWithSlash;
}

if (!combinedPath) {
// No path at all. All other things were already handled above.
result.pathname = null;
// To support http.request
Expand All @@ -918,75 +1026,80 @@
return result;
}

// If a url ENDs in . or .., then it must get a trailing slash.
// however, if it ends in anything else non-slashy,
// then it must NOT get a trailing slash.
let last = srcPath[srcPath.length - 1];
const hasTrailingSlash = (
((result.host || relative.host || srcPath.length > 1) &&
(last === '.' || last === '..')) || last === '');

// Strip single dots, resolve double dots to parent dir
// if the path tries to go above the root, `up` ends up > 0
let up = 0;
for (let i = srcPath.length - 1; i >= 0; i--) {
last = srcPath[i];
if (last === '.') {
spliceOne(srcPath, i);
} else if (last === '..') {
spliceOne(srcPath, i);
up++;
} else if (up) {
spliceOne(srcPath, i);
up--;
// Use optimized single-pass normalization (O(n) instead of O(n²))
const allowAboveRoot = !mustEndAbs && !removeAllDots;
const { segments, up, trailingSlash } = normalizePathSegments(combinedPath, allowAboveRoot);

// Determine if result needs trailing slash
// hasTrailingSlash is true if path ended with /, ., or ..
const hasTrailingSlash = trailingSlash &&
(result.host || relative.host || segments.length > 0);

// Handle remaining 'up' count - add leading .. if allowed
let srcPath = segments;
if (up > 0 && allowAboveRoot) {
// Prepend '..' segments for remaining up count
const newPath = [];
for (let i = 0; i < up; i++) {
ArrayPrototypePush(newPath, '..');
}
}

// If the path is allowed to go above the root, restore leading ..s
if (!mustEndAbs && !removeAllDots) {
while (up--) {
srcPath.unshift('..');
for (let i = 0; i < srcPath.length; i++) {
ArrayPrototypePush(newPath, srcPath[i]);
}
srcPath = newPath;
}

if (mustEndAbs && srcPath[0] !== '' &&
(!srcPath[0] || srcPath[0].charAt(0) !== '/')) {
srcPath.unshift('');
}

if (hasTrailingSlash && StringPrototypeAt(ArrayPrototypeJoin(srcPath, '/'), -1) !== '/') {
srcPath.push('');
// Handle mustEndAbs - ensure path starts with /
let isAbsolute = srcPath.length > 0 && srcPath[0] === '';
if (!isAbsolute && srcPath.length > 0 && srcPath[0] &&
srcPath[0].charAt(0) === '/') {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
srcPath[0].charAt(0) === '/') {
srcPath[0][0] === '/') {

isAbsolute = true;
}

const isAbsolute = srcPath[0] === '' ||
(srcPath[0] && srcPath[0].charAt(0) === '/');

// put the host back
// put the host back for noLeadingSlashes protocols

Check failure on line 1059 in lib/url.js

View workflow job for this annotation

GitHub Actions / lint-js-and-md

Comments should not begin with a lowercase character
if (noLeadingSlashes) {
result.hostname =
result.host = isAbsolute ? '' : srcPath.length ? srcPath.shift() : '';
result.host = isAbsolute ? '' : srcPath.length ? srcPath[0] : '';
if (result.host) {
// Remove the host from srcPath (first element)
srcPath = srcPath.length > 1 ?
ArrayPrototypeJoin(srcPath, '/').slice(result.host.length + 1).split('/') :
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Primordials

[];
if (srcPath.length === 1 && srcPath[0] === '') srcPath = [];
}
// Occasionally the auth can get stuck only in host.
// This especially happens in cases like
// url.resolveObject('mailto:local1@domain1', 'local2@domain2')
const authInHost = result.host && result.host.indexOf('@') > 0 ?
result.host.split('@') : false;
const authInHost = result.host && StringPrototypeIndexOf(result.host, '@') > 0;
if (authInHost) {
result.auth = authInHost.shift();
result.host = result.hostname = authInHost.shift();
const atIdx = StringPrototypeIndexOf(result.host, '@');
result.auth = StringPrototypeSlice(result.host, 0, atIdx);
result.host = result.hostname = StringPrototypeSlice(result.host, atIdx + 1);
}
}

mustEndAbs ||= (result.host && srcPath.length);

if (mustEndAbs && !isAbsolute) {
srcPath.unshift('');
// Need to add leading empty string for absolute path
const newPath = [''];
for (let i = 0; i < srcPath.length; i++) {
ArrayPrototypePush(newPath, srcPath[i]);
}
srcPath = newPath;
isAbsolute = true;
}

// Handle trailing slash
if (hasTrailingSlash) {
if (srcPath.length === 0 || srcPath[srcPath.length - 1] !== '') {
ArrayPrototypePush(srcPath, '');
}
}

if (!srcPath.length) {
result.pathname = null;
result.path = null;
} else {
result.pathname = srcPath.join('/');
result.pathname = ArrayPrototypeJoin(srcPath, '/');
}

// To support request.http
Expand Down
Loading