From 4b4fba81f73fd3d73c91433404844e35cad1e4c6 Mon Sep 17 00:00:00 2001 From: Mix Irving Date: Tue, 16 Jun 2026 14:55:35 +1200 Subject: [PATCH 1/2] make base64 encoding default, allowlist some know text formats --- src/cassette.ts | 123 ++++++++++++++++++++++++---------------------- src/index.spec.ts | 14 +++--- 2 files changed, 71 insertions(+), 66 deletions(-) diff --git a/src/cassette.ts b/src/cassette.ts index 4ca6437..75f5a19 100755 --- a/src/cassette.ts +++ b/src/cassette.ts @@ -214,73 +214,78 @@ export function responseToHttpResponse(response: any, body: string): HttpRespons export async function consumeBody(req: Request | Response) { if (isBinary(req.headers)) { return Buffer.from(await req.arrayBuffer()).toString('base64'); - } else { + } + + const contentLength = req.headers.get('content-length'); + if (contentLength && parseInt(contentLength) > 1024 * 1024) { // > 1MB const contentType = req.headers.get('content-type') ?? '???'; - const contentLength = req.headers.get('content-length'); - - // Log potential streaming responses that aren't covered - if (contentLength && parseInt(contentLength) > 1024 * 1024) { // > 1MB - console.warn(`VCR: Large response detected (${contentLength} bytes) with content-type: ${contentType}. Consider adding this content-type to binary detection if it's a streaming response.`); - } - - return await req.text() + // Recognised text types are stored verbatim, so a large one is inlined into + // the cassette as-is and bloats it. If this is really a streamed/binary + // payload mislabelled as text, give it a binary content-type instead. + console.warn(`VCR: Large response detected (${contentLength} bytes) with content-type: ${contentType}. It will be inlined into the cassette as text; if it is actually binary/streamed, serve it with a binary content-type.`); } + + return await req.text(); } -function isBinaryMatch(headers: Record): boolean { - const encodingHeader = headers['content-encoding'] ?? ''; - const contentHeader = headers['content-type'] ?? ''; - - // Check for gzip encoding - if (encodingHeader.indexOf('gzip') >= 0 || contentHeader.indexOf('gzip') >= 0) { +// Content types we know are text and can store verbatim in the cassette. +// Anything not on this list is treated as binary (see isBinaryContent), so the +// failure mode is "readable text stored as base64" rather than "binary corrupted". +const TEXT_CONTENT_TYPES = [ + 'application/json', + 'application/xml', + 'application/javascript', + 'application/ecmascript', + 'application/x-www-form-urlencoded', + 'application/graphql', + 'application/csp-report', +]; + +// Structured-syntax suffixes that are always text (e.g. image/svg+xml, application/ld+json). +const TEXT_CONTENT_TYPE_SUFFIXES = ['+json', '+xml']; + +function isTextContentType(type: string): boolean { + if (type.startsWith('text/')) { return true; } - - // Check for common binary content types - const binaryContentTypes = [ - 'application/octet-stream', - 'application/x-binary', - 'application/x-chrome-extension', - 'application/x-executable', - 'application/x-msdownload', - 'application/zip', - 'application/x-zip-compressed', - 'application/pdf', - 'image/', - 'video/', - 'audio/', - 'font/', - 'model/' - ]; - - return binaryContentTypes.some(type => contentHeader.startsWith(type)); + if (TEXT_CONTENT_TYPES.includes(type)) { + return true; + } + return TEXT_CONTENT_TYPE_SUFFIXES.some((suffix) => type.endsWith(suffix)); } -export function isBinary(headers: Headers): boolean { - const encodingHeader = headers.get('content-encoding') ?? ''; - const contentHeader = headers.get('content-type') ?? ''; - - // Check for gzip encoding - if (encodingHeader.indexOf('gzip') >= 0 || contentHeader.indexOf('gzip') >= 0) { +// Shared core: a payload is binary unless we positively recognise it as text. +function isBinaryContent(contentType: string, contentEncoding: string): boolean { + // Force gzip-encoded payloads to base64 storage. NOTE: by the time we observe + // the body the HTTP client (undici/Node http) has usually already decompressed + // it, so the bytes here are typically the decoded *text*, not gzip — this isn't + // detecting binary, it's just pinning the storage representation. Record and + // playback both key off this same header, so the round-trip stays consistent. + if (contentEncoding.indexOf('gzip') >= 0 || contentType.indexOf('gzip') >= 0) { return true; } - - // Check for common binary content types - const binaryContentTypes = [ - 'application/octet-stream', - 'application/x-binary', - 'application/x-chrome-extension', - 'application/x-executable', - 'application/x-msdownload', - 'application/zip', - 'application/x-zip-compressed', - 'application/pdf', - 'image/', - 'video/', - 'audio/', - 'font/', - 'model/' - ]; - - return binaryContentTypes.some(type => contentHeader.startsWith(type)); + + // Strip any parameters (e.g. "; charset=utf-8") and normalise. + const type = (contentType.split(';')[0] ?? '').trim().toLowerCase(); + + // No declared content type → assume text, keeping empty/plain bodies readable. + if (!type) { + return false; + } + + return !isTextContentType(type); +} + +function isBinaryMatch(headers: Record): boolean { + return isBinaryContent( + headers['content-type'] ?? '', + headers['content-encoding'] ?? '' + ); +} + +export function isBinary(headers: Headers): boolean { + return isBinaryContent( + headers.get('content-type') ?? '', + headers.get('content-encoding') ?? '' + ); } diff --git a/src/index.spec.ts b/src/index.spec.ts index ff4de4e..f162f54 100755 --- a/src/index.spec.ts +++ b/src/index.spec.ts @@ -180,13 +180,13 @@ test('cassette', async (t) => { } }) const body = await res.arrayBuffer() - console.log(body) + // console.log(body) - const utf8Text = new TextDecoder().decode(body) - console.log(utf8Text.slice(0, 100)) + // const utf8Text = new TextDecoder().decode(body) + // console.log(utf8Text.slice(0, 100)) const base64 = Buffer.from(body).toString('base64') - console.log(base64.slice(0, 100)) + // console.log(base64.slice(0, 100)) t.equal(base64.slice(0, 10), 'H4sICAAAAA'); t.equal(base64.slice(-10), '+W2QBgCAA='); @@ -206,7 +206,7 @@ test('cassette', async (t) => { // Verify it's a binary response const contentType = res.headers.get('content-type'); t.ok(contentType, 'Response should have content-type header'); - console.log('Content-Type:', contentType); + // console.log('Content-Type:', contentType); // Test that our binary detection is working t.equal(contentType, 'application/x-chrome-extension', 'Content-type should be chrome extension'); @@ -216,11 +216,11 @@ test('cassette', async (t) => { t.ok(isBinary(res.headers), 'isBinary should detect chrome extension as binary'); const body = await res.arrayBuffer() - console.log('Response size:', body.byteLength, 'bytes'); + // console.log('Response size:', body.byteLength, 'bytes'); // Verify it's stored as base64 in the cassette const base64 = Buffer.from(body).toString('base64') - console.log('Base64 length:', base64.length); + // console.log('Base64 length:', base64.length); // Verify it's a valid binary file (should start with common binary signatures) t.ok(body.byteLength > 0, 'Response should have content'); From 69708126819409a0a217522840b5c9b08d4e17e1 Mon Sep 17 00:00:00 2001 From: Mix Irving Date: Tue, 16 Jun 2026 15:39:24 +1200 Subject: [PATCH 2/2] allow custom over-ride of base64 encoding decision --- README.md | 29 +++++++++++++++++ src/cassette.ts | 65 +++++++++++++++++++++++--------------- src/index.spec.ts | 80 +++++++++++++++++++++++++++++++++++++++++++++-- src/index.ts | 2 +- src/types.ts | 15 +++++++++ src/vcr.ts | 7 +++-- 6 files changed, 165 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index c1f4634..9ac3814 100644 --- a/README.md +++ b/README.md @@ -202,6 +202,35 @@ vcr.requestPassThrough = (req) => { }; ``` +### Body storage encoding +Bodies are stored in the cassette as readable text by default, but binary payloads (images, archives, executables, etc.) are base64-encoded so they survive the round-trip without corruption. VCR decides which encoding to use from the body's `content-type` (and `content-encoding`) headers. + +The default policy, `defaultBase64EncodeBody`, stores a body as **text** only when its content-type is a recognised text type (`text/*`, `application/json`, `application/xml`, the `+json` / `+xml` structured-syntax suffixes, `application/x-www-form-urlencoded`, etc.) and **base64-encodes** everything else. This way the safe failure mode is "readable text stored as base64" rather than "binary corrupted into text". + +If you need different behavior — for example, a custom API content-type that is actually text, or forcing a type to be base64-encoded — assign your own policy. It returns `true` to base64-encode the body: + +```ts +import { VCR, defaultBase64EncodeBody } from 'socket-vcr-test'; + +const vcr = new VCR(...); + +// Wrap the default and add your own rules. +// `contentType` and `contentEncoding` are pre-extracted from the headers; +// `headers` (a `Headers` instance for live traffic, or a plain record for a +// recorded interaction) are also passed in case you need other header values. +vcr.base64EncodeBody = (contentType, contentEncoding, headers) => { + // Force our custom protobuf type to be base64-encoded... + if (contentType.startsWith('application/x-acme-proto')) { + return true; + } + + // ...and fall back to the built-in policy for everything else. + return defaultBase64EncodeBody(contentType, contentEncoding, headers); +}; +``` + +The policy runs on **both** recording and playback, so it must make the same decision in each phase — otherwise a body stored as base64 could be replayed as text (or vice versa). Keep it stable for a given cassette. + ## FAQ ### How can I pretty print JSON bodies? diff --git a/src/cassette.ts b/src/cassette.ts index 75f5a19..2dcab38 100755 --- a/src/cassette.ts +++ b/src/cassette.ts @@ -3,7 +3,7 @@ import { ClientRequestInterceptor } from '@mswjs/interceptors/ClientRequest'; import { BatchInterceptor } from '@mswjs/interceptors' -import { HttpInteraction, ICassetteStorage, IRequestMatcher, RecordMode, HttpRequest, HttpResponse, HttpRequestMasker, PassThroughHandler } from './types'; +import { HttpInteraction, ICassetteStorage, IRequestMatcher, RecordMode, HttpRequest, HttpResponse, HttpRequestMasker, PassThroughHandler, Base64EncodeBody } from './types'; import { Readable } from 'node:stream'; import assert from 'node:assert'; @@ -29,6 +29,7 @@ export class Cassette { private readonly mode: RecordMode, private readonly masker: HttpRequestMasker, private readonly passThroughHandler: PassThroughHandler | undefined, + private readonly base64EncodeBody: Base64EncodeBody = defaultBase64EncodeBody, ) {} public isDone(): boolean { @@ -83,8 +84,8 @@ export class Cassette { const res: Response = response.clone(); - const httpRequest = requestToHttpRequest(req, await consumeBody(req)); - const httpResponse = responseToHttpResponse(res, await consumeBody(res)); + const httpRequest = requestToHttpRequest(req, await consumeBody(req, this.base64EncodeBody)); + const httpResponse = responseToHttpResponse(res, await consumeBody(res, this.base64EncodeBody)); this.masker(httpRequest); @@ -121,7 +122,7 @@ export class Cassette { private async playback(request: any): Promise { const req = request.clone(); - const httpRequest = requestToHttpRequest(req, await consumeBody(req)); + const httpRequest = requestToHttpRequest(req, await consumeBody(req, this.base64EncodeBody)); this.masker?.(httpRequest); const match = this.findMatch(httpRequest); if (!match) { @@ -131,7 +132,7 @@ export class Cassette { this.usedInteractions.add(match); let body: string | Readable = match.response.body; - if (isBinaryMatch(match.response.headers)) { + if (shouldBase64(match.response.headers, this.base64EncodeBody)) { const readable = new Readable(); readable._read = () => {}; readable.push(Buffer.from(match.response.body, 'base64')); @@ -158,7 +159,7 @@ export class Cassette { private async isPassThrough(request: any) { if (this.passThroughHandler) { const req = request.clone(); - const httpRequest = requestToHttpRequest(req, await consumeBody(req)); + const httpRequest = requestToHttpRequest(req, await consumeBody(req, this.base64EncodeBody)); return this.passThroughHandler(httpRequest); } return false; @@ -211,8 +212,11 @@ export function responseToHttpResponse(response: any, body: string): HttpRespons } } -export async function consumeBody(req: Request | Response) { - if (isBinary(req.headers)) { +export async function consumeBody( + req: Request | Response, + base64EncodeBody: Base64EncodeBody = defaultBase64EncodeBody, +) { + if (shouldBase64(req.headers, base64EncodeBody)) { return Buffer.from(await req.arrayBuffer()).toString('base64'); } @@ -229,7 +233,7 @@ export async function consumeBody(req: Request | Response) { } // Content types we know are text and can store verbatim in the cassette. -// Anything not on this list is treated as binary (see isBinaryContent), so the +// Anything not on this list is base64-encoded (see defaultBase64EncodeBody), so the // failure mode is "readable text stored as base64" rather than "binary corrupted". const TEXT_CONTENT_TYPES = [ 'application/json', @@ -254,8 +258,31 @@ function isTextContentType(type: string): boolean { return TEXT_CONTENT_TYPE_SUFFIXES.some((suffix) => type.endsWith(suffix)); } -// Shared core: a payload is binary unless we positively recognise it as text. -function isBinaryContent(contentType: string, contentEncoding: string): boolean { +// Reads a header from either a live `Headers` instance (case-insensitive) or a +// recorded plain record (keys are already lower-cased by responseToHttpResponse). +function getHeader(headers: Headers | Record, name: string): string { + if (headers instanceof Headers) { + return headers.get(name) ?? ''; + } + return headers[name] ?? ''; +} + +// Extracts content-type/content-encoding from the headers and delegates to the +// configured policy. +function shouldBase64( + headers: Headers | Record, + base64EncodeBody: Base64EncodeBody, +): boolean { + return base64EncodeBody( + getHeader(headers, 'content-type'), + getHeader(headers, 'content-encoding'), + headers, + ); +} + +// Default policy: a body is base64-encoded unless we positively recognise it as text. +// Override via `VCR#base64EncodeBody` to customise. +export const defaultBase64EncodeBody: Base64EncodeBody = (contentType, contentEncoding) => { // Force gzip-encoded payloads to base64 storage. NOTE: by the time we observe // the body the HTTP client (undici/Node http) has usually already decompressed // it, so the bytes here are typically the decoded *text*, not gzip — this isn't @@ -274,18 +301,4 @@ function isBinaryContent(contentType: string, contentEncoding: string): boolean } return !isTextContentType(type); -} - -function isBinaryMatch(headers: Record): boolean { - return isBinaryContent( - headers['content-type'] ?? '', - headers['content-encoding'] ?? '' - ); -} - -export function isBinary(headers: Headers): boolean { - return isBinaryContent( - headers.get('content-type') ?? '', - headers.get('content-encoding') ?? '' - ); -} +}; diff --git a/src/index.spec.ts b/src/index.spec.ts index f162f54..da8cc90 100755 --- a/src/index.spec.ts +++ b/src/index.spec.ts @@ -1,10 +1,26 @@ import { test } from 'tap'; import { join } from 'node:path'; -import { RecordMode, VCR } from './index'; +import { RecordMode, VCR, DefaultRequestMatcher, HttpInteraction, ICassetteStorage } from './index'; import { FileStorage } from "./file-storage"; import { unlink } from 'node:fs/promises'; import { existsSync } from 'node:fs'; +// In-memory cassette storage so tests can seed interactions and inspect what +// gets saved without touching the filesystem. +class MemoryStorage implements ICassetteStorage { + public readonly saved: Record = {}; + + constructor(private readonly seed: Record = {}) {} + + async load(name: string): Promise { + return this.seed[name]; + } + + async save(name: string, interactions: HttpInteraction[]): Promise { + this.saved[name] = interactions; + } +} + // Helper functions to match axios API signature, minimizing the diff with the original code async function fetchPost(url: string, data: string, config?: any) { const response = await fetch(url, { @@ -212,8 +228,9 @@ test('cassette', async (t) => { t.equal(contentType, 'application/x-chrome-extension', 'Content-type should be chrome extension'); // Import the binary detection function to test it directly - const { isBinary } = require('./cassette'); - t.ok(isBinary(res.headers), 'isBinary should detect chrome extension as binary'); + const { defaultBase64EncodeBody } = require('./cassette'); + const contentEncoding = res.headers.get('content-encoding') ?? ''; + t.ok(defaultBase64EncodeBody(contentType ?? '', contentEncoding, res.headers), 'defaultBase64EncodeBody should base64-encode a chrome extension body'); const body = await res.arrayBuffer() // console.log('Response size:', body.byteLength, 'bytes'); @@ -338,4 +355,61 @@ test('cassette', async (t) => { }); }); }); + + t.test('base64EncodeBody option', async (t) => { + t.test('is honored on playback (decodes base64 the default would leave as text)', async (t) => { + // The body is base64 but the content-type is application/json, which the + // default policy stores/reads as verbatim text. Only a policy returning + // true will base64-decode it back into JSON on playback. + const url = 'https://example.test/data'; + const seeded: HttpInteraction[] = [{ + request: { url, method: 'GET', headers: {}, body: '' }, + response: { + status: 200, + statusText: 'OK', + headers: { 'content-type': 'application/json' }, + body: Buffer.from(JSON.stringify({ hello: 'world' })).toString('base64'), + }, + }]; + const storage = new MemoryStorage({ seed: seeded }); + + // Match on method + url only, so we don't have to reproduce the headers + // undici adds to the outgoing request. + const matcher = new DefaultRequestMatcher(); + matcher.compareHeaders = false; + matcher.compareBody = false; + + const vcr = new VCR(storage); + vcr.mode = RecordMode.none; + vcr.matcher = matcher; + vcr.base64EncodeBody = () => true; + + await vcr.useCassette('seed', async () => { + const res = await fetch(url); + t.same(await res.json(), { hello: 'world' }, 'custom policy decoded the base64 body'); + }); + }); + + t.test('is honored on record (forces base64 storage of a JSON body)', async (t) => { + const storage = new MemoryStorage(); + + const vcr = new VCR(storage); + vcr.mode = RecordMode.once; + vcr.base64EncodeBody = () => true; + + await vcr.useCassette('recorded', async () => { + await fetchPost('https://httpbin.org/post', JSON.stringify({ name: 'alex' }), { + headers: { 'Content-Type': 'application/json', 'Accept': 'application/json' }, + }); + }); + + const saved = storage.saved['recorded'] ?? []; + t.ok(saved.length > 0, 'an interaction was recorded'); + + const responseBody = saved[0]?.response.body ?? ''; + t.notMatch(responseBody, /^\{/, 'JSON response was not stored as verbatim text'); + const decoded = Buffer.from(responseBody, 'base64').toString('utf8'); + t.match(decoded, /alex/, 'base64 body decodes back to the JSON payload'); + }); + }); }); diff --git a/src/index.ts b/src/index.ts index 7949b03..8ac5c28 100755 --- a/src/index.ts +++ b/src/index.ts @@ -1,5 +1,5 @@ export * from './types'; export * from './default-request-matcher'; export * from './file-storage'; -export { MatchNotFoundError } from './cassette'; +export { MatchNotFoundError, defaultBase64EncodeBody } from './cassette'; export * from './vcr'; diff --git a/src/types.ts b/src/types.ts index 7d5bf15..4a223bc 100755 --- a/src/types.ts +++ b/src/types.ts @@ -83,3 +83,18 @@ export type HttpRequestMasker = (httpRequest: HttpRequest) => void; * A function that allows an HTTP request to pass through (never be recorded) */ export type PassThroughHandler = (httpRequest: HttpRequest) => boolean; + +/** + * Decides whether a request/response body should be stored base64-encoded + * (returning `true`) rather than as verbatim text. + * + * `contentType` and `contentEncoding` are pre-extracted for convenience; the full + * `headers` are also provided (either as a `Headers` instance for live traffic, or + * a plain record for a recorded cassette interaction) so custom policies can + * inspect other headers if needed. + */ +export type Base64EncodeBody = ( + contentType: string, + contentEncoding: string, + headers: Headers | Record, +) => boolean; diff --git a/src/vcr.ts b/src/vcr.ts index 4fd41ea..201e7da 100755 --- a/src/vcr.ts +++ b/src/vcr.ts @@ -1,7 +1,7 @@ import { setTimeout } from 'node:timers/promises'; -import { HttpRequestMasker, ICassetteStorage, IRequestMatcher, PassThroughHandler, RecordMode } from './types'; +import { Base64EncodeBody, HttpRequestMasker, ICassetteStorage, IRequestMatcher, PassThroughHandler, RecordMode } from './types'; import { DefaultRequestMatcher } from './default-request-matcher'; -import { Cassette } from './cassette'; +import { Cassette, defaultBase64EncodeBody } from './cassette'; const ENV_TO_RECORD_MODE: Record = { [RecordMode.none]: RecordMode.none, @@ -16,13 +16,14 @@ export class VCR { public requestMasker: HttpRequestMasker = () => {}; public requestPassThrough?: PassThroughHandler; public mode: RecordMode = RecordMode.once; + public base64EncodeBody: Base64EncodeBody = defaultBase64EncodeBody; constructor (private readonly storage: ICassetteStorage) {} public async useCassette(name: string, action: () => Promise) { const mode = ENV_TO_RECORD_MODE[process.env['VCR_MODE'] ?? this.mode] ?? this.mode; - var cassette = new Cassette(this.storage, this.matcher, name, mode, this.requestMasker, this.requestPassThrough); + var cassette = new Cassette(this.storage, this.matcher, name, mode, this.requestMasker, this.requestPassThrough, this.base64EncodeBody); await cassette.mount(); try { await action();