Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 65 additions & 6 deletions runtime/python_bridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,65 @@ class ProtocolError(Exception):
class InstanceHandleError(ValueError):
"""Raised when an instance handle is unknown or no longer valid."""

_NO_DESERIALIZE = object()
_ERR_BYTES_MISSING_B64 = 'Invalid bytes envelope: missing b64'
_ERR_BYTES_MISSING_DATA = 'Invalid bytes envelope: missing data'
_ERR_BYTES_INVALID_BASE64 = 'Invalid bytes envelope: invalid base64'


def _deserialize_bytes_envelope(value) -> object:
"""
Decode base64-encoded bytes envelopes from JS into Python bytes.

Supported shapes:
- { "__tywrap_bytes__": true, "b64": "..." } (JS SafeCodec.encodeRequest)
- { "__type__": "bytes", "encoding": "base64", "data": "..." } (legacy/compat)

Why: TS SafeCodec encodes Uint8Array/ArrayBuffer as base64 objects, but
Python handlers expect real bytes/bytearray to preserve behavior (e.g., len()).
"""
if not isinstance(value, dict):
return _NO_DESERIALIZE

if value.get('__tywrap_bytes__') is True:
b64 = value.get('b64')
if not isinstance(b64, str):
raise ProtocolError(_ERR_BYTES_MISSING_B64)
try:
return base64.b64decode(b64, validate=True)
except Exception as exc:
raise ProtocolError(_ERR_BYTES_INVALID_BASE64) from exc

if value.get('__type__') == 'bytes' and value.get('encoding') == 'base64':
data = value.get('data')
if not isinstance(data, str):
raise ProtocolError(_ERR_BYTES_MISSING_DATA)
try:
return base64.b64decode(data, validate=True)
except Exception as exc:
raise ProtocolError(_ERR_BYTES_INVALID_BASE64) from exc

return _NO_DESERIALIZE


def deserialize(value):
"""
Recursively deserialize request values into Python-native types.

Why: requests are JSON-only; we need a small set of explicit decoders
(currently bytes) to restore Python semantics at the boundary.
"""
decoded = _deserialize_bytes_envelope(value)
if decoded is not _NO_DESERIALIZE:
return decoded

if isinstance(value, list):
return [deserialize(item) for item in value]
if isinstance(value, dict):
# Preserve dict shape while decoding nested values.
return {k: deserialize(v) for k, v in value.items()}
return value


_PROTOCOL_DIAGNOSTIC_MAX = 2048

Expand Down Expand Up @@ -648,8 +707,8 @@ def serialize_stdlib(obj):
def handle_call(params):
module_name = require_str(params, 'module')
function_name = require_str(params, 'functionName')
args = coerce_list(params.get('args'), 'args')
kwargs = coerce_dict(params.get('kwargs'), 'kwargs')
args = deserialize(coerce_list(params.get('args'), 'args'))
kwargs = deserialize(coerce_dict(params.get('kwargs'), 'kwargs'))
mod = importlib.import_module(module_name)
func = getattr(mod, function_name)
res = func(*args, **kwargs)
Expand All @@ -659,8 +718,8 @@ def handle_call(params):
def handle_instantiate(params):
module_name = require_str(params, 'module')
class_name = require_str(params, 'className')
args = coerce_list(params.get('args'), 'args')
kwargs = coerce_dict(params.get('kwargs'), 'kwargs')
args = deserialize(coerce_list(params.get('args'), 'args'))
kwargs = deserialize(coerce_dict(params.get('kwargs'), 'kwargs'))
mod = importlib.import_module(module_name)
cls = getattr(mod, class_name)
obj = cls(*args, **kwargs)
Expand All @@ -672,8 +731,8 @@ def handle_instantiate(params):
def handle_call_method(params):
handle_id = require_str(params, 'handle')
method_name = require_str(params, 'methodName')
args = coerce_list(params.get('args'), 'args')
kwargs = coerce_dict(params.get('kwargs'), 'kwargs')
args = deserialize(coerce_list(params.get('args'), 'args'))
kwargs = deserialize(coerce_dict(params.get('kwargs'), 'kwargs'))
if handle_id not in instances:
raise InstanceHandleError(f'Unknown instance handle: {handle_id}')
obj = instances[handle_id]
Expand Down
6 changes: 4 additions & 2 deletions src/runtime/process-io.ts
Original file line number Diff line number Diff line change
Expand Up @@ -733,8 +733,10 @@ export class ProcessIO extends BoundedContext implements Transport {
}
}, this.writeQueueTimeoutMs);

// Unref the timer so it doesn't keep the process alive
entry.timeoutHandle.unref();
// Unref the timer so it doesn't keep the process alive (best-effort for non-Node runtimes)
if (typeof entry.timeoutHandle.unref === 'function') {
entry.timeoutHandle.unref();
}

return entry;
}
Expand Down
87 changes: 85 additions & 2 deletions src/runtime/safe-codec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -319,12 +319,89 @@
private readonly rejectNonStringKeys: boolean;
private readonly maxPayloadBytes: number;
private readonly bytesHandling: 'base64' | 'reject' | 'passthrough';
private readonly reviveValueBound: (key: string, value: unknown) => unknown;
private static readonly base64Pattern =
/^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$/;

Check warning on line 324 in src/runtime/safe-codec.ts

View workflow job for this annotation

GitHub Actions / lint

Unsafe Regular Expression

constructor(options: CodecOptions = {}) {
this.rejectSpecialFloats = options.rejectSpecialFloats ?? true;
this.rejectNonStringKeys = options.rejectNonStringKeys ?? true;
this.maxPayloadBytes = options.maxPayloadBytes ?? DEFAULT_MAX_PAYLOAD_BYTES;
this.bytesHandling = options.bytesHandling ?? 'base64';
this.reviveValueBound = this.reviveValue.bind(this);
}

private assertValidBase64(b64: string): void {
if (!SafeCodec.base64Pattern.test(b64)) {
throw new BridgeCodecError('Invalid base64 in bytes envelope', {
codecPhase: 'decode',
valueType: 'bytes',
});
}
}

/**
* Convert base64 string to Uint8Array.
*
* Why: Python bridge represents bytes/bytearray as base64 envelopes. Decoding them here
* restores ergonomic JS types at the boundary.
*/
private fromBase64(b64: string): Uint8Array {
this.assertValidBase64(b64);

if (typeof Buffer !== 'undefined') {
const buf = Buffer.from(b64, 'base64');
return new Uint8Array(buf.buffer, buf.byteOffset, buf.length);
}
if (globalThis.atob) {
const bin = globalThis.atob(b64);
const arr = Array.from(bin, c => c.charCodeAt(0));
return new Uint8Array(arr);
}
throw new BridgeCodecError('Base64 decoding is not available in this runtime', {
codecPhase: 'decode',
valueType: 'bytes',
});
}

/**
* JSON.parse reviver that decodes bytes envelopes.
*
* Supported shapes:
* - { "__tywrap_bytes__": true, "b64": "..." } (JS SafeCodec.encodeRequest; also allowed in responses)
* - { "__type__": "bytes", "encoding": "base64", "data": "..." } (Python SafeCodec default encoder)
*/
private reviveValue(_key: string, value: unknown): unknown {
if (value === null || typeof value !== 'object' || Array.isArray(value)) {
return value;
}
const obj = value as Record<string, unknown>;

if (obj.__tywrap_bytes__ === true && typeof obj.b64 === 'string') {
try {
return this.fromBase64(obj.b64);
} catch (err) {
const errorMessage = err instanceof Error ? err.message : String(err);
throw new BridgeCodecError(`Bytes envelope decode failed: ${errorMessage}`, {
codecPhase: 'decode',
valueType: 'bytes',
});
}
}

if (obj.__type__ === 'bytes' && obj.encoding === 'base64' && typeof obj.data === 'string') {
try {
return this.fromBase64(obj.data);
} catch (err) {
const errorMessage = err instanceof Error ? err.message : String(err);
throw new BridgeCodecError(`Bytes envelope decode failed: ${errorMessage}`, {
codecPhase: 'decode',
valueType: 'bytes',
});
}
}

return value;
}

private toBridgeExecutionError(error: NormalizedPythonError): BridgeExecutionError {
Expand Down Expand Up @@ -461,8 +538,11 @@
// Parse JSON
let parsed: unknown;
try {
parsed = JSON.parse(payload);
parsed = JSON.parse(payload, this.reviveValueBound);
} catch (err) {
if (err instanceof BridgeCodecError || err instanceof BridgeProtocolError) {
throw err;
}
const errorMessage = err instanceof Error ? err.message : String(err);
throw new BridgeCodecError(
`JSON parse failed: ${errorMessage}. Payload snippet: ${summarizePayloadForError(payload)}`,
Expand Down Expand Up @@ -510,8 +590,11 @@
// Parse JSON
let parsed: unknown;
try {
parsed = JSON.parse(payload);
parsed = JSON.parse(payload, this.reviveValueBound);
} catch (err) {
if (err instanceof BridgeCodecError || err instanceof BridgeProtocolError) {
throw err;
}
const errorMessage = err instanceof Error ? err.message : String(err);
throw new BridgeCodecError(
`JSON parse failed: ${errorMessage}. Payload snippet: ${summarizePayloadForError(payload)}`,
Expand Down
49 changes: 49 additions & 0 deletions test/runtime_node.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,55 @@ describeNodeOnly('Node.js Runtime Bridge', () => {
testTimeout
);

it(
'should roundtrip Uint8Array as Python bytes',
async () => {
const pythonAvailable = await isPythonAvailable();
if (!pythonAvailable || !isBridgeScriptAvailable()) return;

const input = new Uint8Array([72, 101, 108, 108, 111]); // "Hello"

const length = await bridge.call<number>('builtins', 'len', [input]);
expect(length).toBe(5);

const output = await bridge.call<Uint8Array>('builtins', 'bytes', [input]);
expect(output).toBeInstanceOf(Uint8Array);
expect(Array.from(output)).toEqual(Array.from(input));
},
testTimeout
);

it(
'should support zero-length Uint8Array as Python bytes',
async () => {
const pythonAvailable = await isPythonAvailable();
if (!pythonAvailable || !isBridgeScriptAvailable()) return;

const input = new Uint8Array([]);

const length = await bridge.call<number>('builtins', 'len', [input]);
expect(length).toBe(0);

const output = await bridge.call<Uint8Array>('builtins', 'bytes', [input]);
expect(output).toBeInstanceOf(Uint8Array);
expect(output.length).toBe(0);
},
testTimeout
);

it(
'should reject malformed bytes envelopes with explicit protocol error',
async () => {
const pythonAvailable = await isPythonAvailable();
if (!pythonAvailable || !isBridgeScriptAvailable()) return;

await expect(
bridge.call('builtins', 'len', [{ __tywrap_bytes__: true, b64: '%%%' }])
).rejects.toThrow(/Invalid bytes envelope: invalid base64/);
},
testTimeout
);

it(
'should handle function calls with kwargs',
async () => {
Expand Down
41 changes: 41 additions & 0 deletions test/safe-codec.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,47 @@ describe('decodeResponse - Basic', () => {
expect(result).toEqual({ a: 1 });
});

it('decodes Python bytes envelope (__type__: bytes) to Uint8Array', () => {
const payload = JSON.stringify({
id: 1,
result: { __type__: 'bytes', encoding: 'base64', data: 'SGVsbG8=' }, // "Hello"
});
const result = codec.decodeResponse<Uint8Array>(payload);
expect(result).toBeInstanceOf(Uint8Array);
expect(Array.from(result)).toEqual([72, 101, 108, 108, 111]);
});

it('decodes Tywrap bytes envelope (__tywrap_bytes__) to Uint8Array', () => {
const payload = JSON.stringify({
id: 1,
result: { __tywrap_bytes__: true, b64: 'SGVsbG8=' }, // "Hello"
});
const result = codec.decodeResponse<Uint8Array>(payload);
expect(result).toBeInstanceOf(Uint8Array);
expect(Array.from(result)).toEqual([72, 101, 108, 108, 111]);
});

it('decodes bytes envelope even when bytesHandling is reject', () => {
const rejectCodec = new SafeCodec({ bytesHandling: 'reject' });
const payload = JSON.stringify({
id: 1,
result: { __type__: 'bytes', encoding: 'base64', data: 'SGVsbG8=' }, // "Hello"
});
const result = rejectCodec.decodeResponse<Uint8Array>(payload);
expect(result).toBeInstanceOf(Uint8Array);
expect(Array.from(result)).toEqual([72, 101, 108, 108, 111]);
});

it('surfaces invalid bytes envelope base64 as a codec error', () => {
const payload = JSON.stringify({
id: 1,
result: { __tywrap_bytes__: true, b64: '%%%' },
});
expect(() => codec.decodeResponse(payload)).toThrow(BridgeCodecError);
expect(() => codec.decodeResponse(payload)).toThrow(/Bytes envelope decode failed/);
expect(() => codec.decodeResponse(payload)).not.toThrow(/JSON parse failed/);
});

it('parses arrays', () => {
const result = codec.decodeResponse<number[]>('[1, 2, 3]');
expect(result).toEqual([1, 2, 3]);
Expand Down
Loading