Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,35 @@ vcr.requestPassThrough = (req) => {
};
```

### Body storage encoding
Bodies are stored in the cassette as readable text by default, but binary payloads (images, archives, executables, etc.) are base64-encoded so they survive the round-trip without corruption. VCR decides which encoding to use from the body's `content-type` (and `content-encoding`) headers.

The default policy, `defaultBase64EncodeBody`, stores a body as **text** only when its content-type is a recognised text type (`text/*`, `application/json`, `application/xml`, the `+json` / `+xml` structured-syntax suffixes, `application/x-www-form-urlencoded`, etc.) and **base64-encodes** everything else. This way the safe failure mode is "readable text stored as base64" rather than "binary corrupted into text".

If you need different behavior — for example, a custom API content-type that is actually text, or forcing a type to be base64-encoded — assign your own policy. It returns `true` to base64-encode the body:

```ts
import { VCR, defaultBase64EncodeBody } from 'socket-vcr-test';

const vcr = new VCR(...);

// Wrap the default and add your own rules.
// `contentType` and `contentEncoding` are pre-extracted from the headers;
// `headers` (a `Headers` instance for live traffic, or a plain record for a
// recorded interaction) are also passed in case you need other header values.
vcr.base64EncodeBody = (contentType, contentEncoding, headers) => {
// Force our custom protobuf type to be base64-encoded...
if (contentType.startsWith('application/x-acme-proto')) {
return true;
}

// ...and fall back to the built-in policy for everything else.
return defaultBase64EncodeBody(contentType, contentEncoding, headers);
};
```

The policy runs on **both** recording and playback, so it must make the same decision in each phase — otherwise a body stored as base64 could be replayed as text (or vice versa). Keep it stable for a given cassette.

## FAQ
### How can I pretty print JSON bodies?

Expand Down
154 changes: 86 additions & 68 deletions src/cassette.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { ClientRequestInterceptor } from '@mswjs/interceptors/ClientRequest';
import { BatchInterceptor } from '@mswjs/interceptors'


import { HttpInteraction, ICassetteStorage, IRequestMatcher, RecordMode, HttpRequest, HttpResponse, HttpRequestMasker, PassThroughHandler } from './types';
import { HttpInteraction, ICassetteStorage, IRequestMatcher, RecordMode, HttpRequest, HttpResponse, HttpRequestMasker, PassThroughHandler, Base64EncodeBody } from './types';
import { Readable } from 'node:stream';
import assert from 'node:assert';

Expand All @@ -29,6 +29,7 @@ export class Cassette {
private readonly mode: RecordMode,
private readonly masker: HttpRequestMasker,
private readonly passThroughHandler: PassThroughHandler | undefined,
private readonly base64EncodeBody: Base64EncodeBody = defaultBase64EncodeBody,
) {}

public isDone(): boolean {
Expand Down Expand Up @@ -83,8 +84,8 @@ export class Cassette {

const res: Response = response.clone();

const httpRequest = requestToHttpRequest(req, await consumeBody(req));
const httpResponse = responseToHttpResponse(res, await consumeBody(res));
const httpRequest = requestToHttpRequest(req, await consumeBody(req, this.base64EncodeBody));
const httpResponse = responseToHttpResponse(res, await consumeBody(res, this.base64EncodeBody));

this.masker(httpRequest);

Expand Down Expand Up @@ -121,7 +122,7 @@ export class Cassette {

private async playback(request: any): Promise<void> {
const req = request.clone();
const httpRequest = requestToHttpRequest(req, await consumeBody(req));
const httpRequest = requestToHttpRequest(req, await consumeBody(req, this.base64EncodeBody));
this.masker?.(httpRequest);
const match = this.findMatch(httpRequest);
if (!match) {
Expand All @@ -131,7 +132,7 @@ export class Cassette {
this.usedInteractions.add(match);

let body: string | Readable = match.response.body;
if (isBinaryMatch(match.response.headers)) {
if (shouldBase64(match.response.headers, this.base64EncodeBody)) {
const readable = new Readable();
readable._read = () => {};
readable.push(Buffer.from(match.response.body, 'base64'));
Expand All @@ -158,7 +159,7 @@ export class Cassette {
private async isPassThrough(request: any) {
if (this.passThroughHandler) {
const req = request.clone();
const httpRequest = requestToHttpRequest(req, await consumeBody(req));
const httpRequest = requestToHttpRequest(req, await consumeBody(req, this.base64EncodeBody));
return this.passThroughHandler(httpRequest);
}
return false;
Expand Down Expand Up @@ -211,76 +212,93 @@ export function responseToHttpResponse(response: any, body: string): HttpRespons
}
}

export async function consumeBody(req: Request | Response) {
if (isBinary(req.headers)) {
export async function consumeBody(
req: Request | Response,
base64EncodeBody: Base64EncodeBody = defaultBase64EncodeBody,
) {
if (shouldBase64(req.headers, base64EncodeBody)) {
return Buffer.from(await req.arrayBuffer()).toString('base64');
} else {
}

const contentLength = req.headers.get('content-length');
if (contentLength && parseInt(contentLength) > 1024 * 1024) { // > 1MB
const contentType = req.headers.get('content-type') ?? '???';
const contentLength = req.headers.get('content-length');

// Log potential streaming responses that aren't covered
if (contentLength && parseInt(contentLength) > 1024 * 1024) { // > 1MB
console.warn(`VCR: Large response detected (${contentLength} bytes) with content-type: ${contentType}. Consider adding this content-type to binary detection if it's a streaming response.`);
}

return await req.text()
// Recognised text types are stored verbatim, so a large one is inlined into
// the cassette as-is and bloats it. If this is really a streamed/binary
// payload mislabelled as text, give it a binary content-type instead.
console.warn(`VCR: Large response detected (${contentLength} bytes) with content-type: ${contentType}. It will be inlined into the cassette as text; if it is actually binary/streamed, serve it with a binary content-type.`);
}

return await req.text();
}

function isBinaryMatch(headers: Record<string, string>): boolean {
const encodingHeader = headers['content-encoding'] ?? '';
const contentHeader = headers['content-type'] ?? '';

// Check for gzip encoding
if (encodingHeader.indexOf('gzip') >= 0 || contentHeader.indexOf('gzip') >= 0) {
// Content types we know are text and can store verbatim in the cassette.
// Anything not on this list is base64-encoded (see defaultBase64EncodeBody), so the
// failure mode is "readable text stored as base64" rather than "binary corrupted".
const TEXT_CONTENT_TYPES = [
'application/json',
'application/xml',
'application/javascript',
'application/ecmascript',
'application/x-www-form-urlencoded',
'application/graphql',
'application/csp-report',
];

// Structured-syntax suffixes that are always text (e.g. image/svg+xml, application/ld+json).
const TEXT_CONTENT_TYPE_SUFFIXES = ['+json', '+xml'];

function isTextContentType(type: string): boolean {
if (type.startsWith('text/')) {
return true;
}

// Check for common binary content types
const binaryContentTypes = [
'application/octet-stream',
'application/x-binary',
'application/x-chrome-extension',
'application/x-executable',
'application/x-msdownload',
'application/zip',
'application/x-zip-compressed',
'application/pdf',
'image/',
'video/',
'audio/',
'font/',
'model/'
];

return binaryContentTypes.some(type => contentHeader.startsWith(type));
if (TEXT_CONTENT_TYPES.includes(type)) {
return true;
}
return TEXT_CONTENT_TYPE_SUFFIXES.some((suffix) => type.endsWith(suffix));
}

export function isBinary(headers: Headers): boolean {
const encodingHeader = headers.get('content-encoding') ?? '';
const contentHeader = headers.get('content-type') ?? '';

// Check for gzip encoding
if (encodingHeader.indexOf('gzip') >= 0 || contentHeader.indexOf('gzip') >= 0) {
return true;
// Reads a header from either a live `Headers` instance (case-insensitive) or a
// recorded plain record (keys are already lower-cased by responseToHttpResponse).
function getHeader(headers: Headers | Record<string, string>, name: string): string {
if (headers instanceof Headers) {
return headers.get(name) ?? '';
}

// Check for common binary content types
const binaryContentTypes = [
'application/octet-stream',
'application/x-binary',
'application/x-chrome-extension',
'application/x-executable',
'application/x-msdownload',
'application/zip',
'application/x-zip-compressed',
'application/pdf',
'image/',
'video/',
'audio/',
'font/',
'model/'
];

return binaryContentTypes.some(type => contentHeader.startsWith(type));
return headers[name] ?? '';
}

// Extracts content-type/content-encoding from the headers and delegates to the
// configured policy.
function shouldBase64(
headers: Headers | Record<string, string>,
base64EncodeBody: Base64EncodeBody,
): boolean {
return base64EncodeBody(
getHeader(headers, 'content-type'),
getHeader(headers, 'content-encoding'),
headers,
);
}

// Default policy: a body is base64-encoded unless we positively recognise it as text.
// Override via `VCR#base64EncodeBody` to customise.
export const defaultBase64EncodeBody: Base64EncodeBody = (contentType, contentEncoding) => {
// Force gzip-encoded payloads to base64 storage. NOTE: by the time we observe
// the body the HTTP client (undici/Node http) has usually already decompressed
// it, so the bytes here are typically the decoded *text*, not gzip — this isn't
// detecting binary, it's just pinning the storage representation. Record and
// playback both key off this same header, so the round-trip stays consistent.
if (contentEncoding.indexOf('gzip') >= 0 || contentType.indexOf('gzip') >= 0) {
return true;
}

// Strip any parameters (e.g. "; charset=utf-8") and normalise.
const type = (contentType.split(';')[0] ?? '').trim().toLowerCase();

// No declared content type → assume text, keeping empty/plain bodies readable.
if (!type) {
return false;
}

return !isTextContentType(type);
};
94 changes: 84 additions & 10 deletions src/index.spec.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,26 @@
import { test } from 'tap';
import { join } from 'node:path';
import { RecordMode, VCR } from './index';
import { RecordMode, VCR, DefaultRequestMatcher, HttpInteraction, ICassetteStorage } from './index';
import { FileStorage } from "./file-storage";
import { unlink } from 'node:fs/promises';
import { existsSync } from 'node:fs';

// In-memory cassette storage so tests can seed interactions and inspect what
// gets saved without touching the filesystem.
class MemoryStorage implements ICassetteStorage {
public readonly saved: Record<string, HttpInteraction[]> = {};

constructor(private readonly seed: Record<string, HttpInteraction[]> = {}) {}

async load(name: string): Promise<HttpInteraction[] | undefined> {
return this.seed[name];
}

async save(name: string, interactions: HttpInteraction[]): Promise<void> {
this.saved[name] = interactions;
}
}

// Helper functions to match axios API signature, minimizing the diff with the original code
async function fetchPost(url: string, data: string, config?: any) {
const response = await fetch(url, {
Expand Down Expand Up @@ -180,13 +196,13 @@ test('cassette', async (t) => {
}
})
const body = await res.arrayBuffer()
console.log(body)
// console.log(body)

const utf8Text = new TextDecoder().decode(body)
console.log(utf8Text.slice(0, 100))
// const utf8Text = new TextDecoder().decode(body)
// console.log(utf8Text.slice(0, 100))

const base64 = Buffer.from(body).toString('base64')
console.log(base64.slice(0, 100))
// console.log(base64.slice(0, 100))

t.equal(base64.slice(0, 10), 'H4sICAAAAA');
t.equal(base64.slice(-10), '+W2QBgCAA=');
Expand All @@ -206,21 +222,22 @@ test('cassette', async (t) => {
// Verify it's a binary response
const contentType = res.headers.get('content-type');
t.ok(contentType, 'Response should have content-type header');
console.log('Content-Type:', contentType);
// console.log('Content-Type:', contentType);

// Test that our binary detection is working
t.equal(contentType, 'application/x-chrome-extension', 'Content-type should be chrome extension');

// Import the binary detection function to test it directly
const { isBinary } = require('./cassette');
t.ok(isBinary(res.headers), 'isBinary should detect chrome extension as binary');
const { defaultBase64EncodeBody } = require('./cassette');
const contentEncoding = res.headers.get('content-encoding') ?? '';
t.ok(defaultBase64EncodeBody(contentType ?? '', contentEncoding, res.headers), 'defaultBase64EncodeBody should base64-encode a chrome extension body');

const body = await res.arrayBuffer()
console.log('Response size:', body.byteLength, 'bytes');
// console.log('Response size:', body.byteLength, 'bytes');

// Verify it's stored as base64 in the cassette
const base64 = Buffer.from(body).toString('base64')
console.log('Base64 length:', base64.length);
// console.log('Base64 length:', base64.length);

// Verify it's a valid binary file (should start with common binary signatures)
t.ok(body.byteLength > 0, 'Response should have content');
Expand Down Expand Up @@ -338,4 +355,61 @@ test('cassette', async (t) => {
});
});
});

t.test('base64EncodeBody option', async (t) => {
t.test('is honored on playback (decodes base64 the default would leave as text)', async (t) => {
// The body is base64 but the content-type is application/json, which the
// default policy stores/reads as verbatim text. Only a policy returning
// true will base64-decode it back into JSON on playback.
const url = 'https://example.test/data';
const seeded: HttpInteraction[] = [{
request: { url, method: 'GET', headers: {}, body: '' },
response: {
status: 200,
statusText: 'OK',
headers: { 'content-type': 'application/json' },
body: Buffer.from(JSON.stringify({ hello: 'world' })).toString('base64'),
},
}];
const storage = new MemoryStorage({ seed: seeded });

// Match on method + url only, so we don't have to reproduce the headers
// undici adds to the outgoing request.
const matcher = new DefaultRequestMatcher();
matcher.compareHeaders = false;
matcher.compareBody = false;

const vcr = new VCR(storage);
vcr.mode = RecordMode.none;
vcr.matcher = matcher;
vcr.base64EncodeBody = () => true;

await vcr.useCassette('seed', async () => {
const res = await fetch(url);
t.same(await res.json(), { hello: 'world' }, 'custom policy decoded the base64 body');
});
});

t.test('is honored on record (forces base64 storage of a JSON body)', async (t) => {
const storage = new MemoryStorage();

const vcr = new VCR(storage);
vcr.mode = RecordMode.once;
vcr.base64EncodeBody = () => true;

await vcr.useCassette('recorded', async () => {
await fetchPost('https://httpbin.org/post', JSON.stringify({ name: 'alex' }), {
headers: { 'Content-Type': 'application/json', 'Accept': 'application/json' },
});
});

const saved = storage.saved['recorded'] ?? [];
t.ok(saved.length > 0, 'an interaction was recorded');

const responseBody = saved[0]?.response.body ?? '';
t.notMatch(responseBody, /^\{/, 'JSON response was not stored as verbatim text');
const decoded = Buffer.from(responseBody, 'base64').toString('utf8');
t.match(decoded, /alex/, 'base64 body decodes back to the JSON payload');
});
});
});
2 changes: 1 addition & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
export * from './types';
export * from './default-request-matcher';
export * from './file-storage';
export { MatchNotFoundError } from './cassette';
export { MatchNotFoundError, defaultBase64EncodeBody } from './cassette';
export * from './vcr';
Loading